{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt \n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "读入数据并进行合并处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_train = pd.read_csv('./data/train.csv')\n",
    "df_test = pd.read_csv('./data/test.csv')\n",
    "df_songs=pd.read_csv('./data/songs.csv')#歌曲元数据信息\n",
    "df_members=pd.read_csv('./data/members.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#定义将isrc转换成年份\n",
    "def isrc_to_year(isrc):\n",
    "    if type(isrc) == str:\n",
    "        if int(isrc[5:7]) > 17:#根据分布，最早为1918\n",
    "            return 1900 + int(isrc[5:7])\n",
    "        else:\n",
    "            return 2000 + int(isrc[5:7])\n",
    "    else:\n",
    "        return np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#对isrc进行转化\n",
    "songs_extra = pd.read_csv('./data/song_extra_info.csv')\n",
    "songs_extra['song_year'] = songs_extra['isrc'].apply(isrc_to_year)\n",
    "songs_extra.drop(['isrc', 'name'], axis = 1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>msno</th>\n",
       "      <th>song_id</th>\n",
       "      <th>source_system_tab</th>\n",
       "      <th>source_screen_name</th>\n",
       "      <th>source_type</th>\n",
       "      <th>target</th>\n",
       "      <th>song_year</th>\n",
       "      <th>city</th>\n",
       "      <th>bd</th>\n",
       "      <th>gender</th>\n",
       "      <th>registered_via</th>\n",
       "      <th>registration_init_time</th>\n",
       "      <th>expiration_date</th>\n",
       "      <th>song_length</th>\n",
       "      <th>genre_ids</th>\n",
       "      <th>artist_name</th>\n",
       "      <th>composer</th>\n",
       "      <th>lyricist</th>\n",
       "      <th>language</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=</td>\n",
       "      <td>BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=</td>\n",
       "      <td>explore</td>\n",
       "      <td>Explore</td>\n",
       "      <td>online-playlist</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>20120102.0</td>\n",
       "      <td>20171005.0</td>\n",
       "      <td>206471.0</td>\n",
       "      <td>359</td>\n",
       "      <td>Bastille</td>\n",
       "      <td>Dan Smith| Mark Crew</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pouJqjNRmZOnRNzzMWWkamTKkIGHyvhl/jo4HgbncnM=</td>\n",
       "      <td>BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=</td>\n",
       "      <td>discover</td>\n",
       "      <td>Online playlist more</td>\n",
       "      <td>online-playlist</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>male</td>\n",
       "      <td>4.0</td>\n",
       "      <td>20151220.0</td>\n",
       "      <td>20170930.0</td>\n",
       "      <td>206471.0</td>\n",
       "      <td>359</td>\n",
       "      <td>Bastille</td>\n",
       "      <td>Dan Smith| Mark Crew</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>xbodnNBaLMyqqI7uFJlvHOKMJaizuWo/BB/YHZICcKo=</td>\n",
       "      <td>BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=</td>\n",
       "      <td>my library</td>\n",
       "      <td>Local playlist more</td>\n",
       "      <td>local-library</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>20120804.0</td>\n",
       "      <td>20171004.0</td>\n",
       "      <td>206471.0</td>\n",
       "      <td>359</td>\n",
       "      <td>Bastille</td>\n",
       "      <td>Dan Smith| Mark Crew</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>s0ndDsjI79amU0RBiullFN8HRz9HjE++34jGNa7zJ/s=</td>\n",
       "      <td>BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=</td>\n",
       "      <td>my library</td>\n",
       "      <td>Local playlist more</td>\n",
       "      <td>local-library</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>female</td>\n",
       "      <td>9.0</td>\n",
       "      <td>20110808.0</td>\n",
       "      <td>20170917.0</td>\n",
       "      <td>206471.0</td>\n",
       "      <td>359</td>\n",
       "      <td>Bastille</td>\n",
       "      <td>Dan Smith| Mark Crew</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Vw4Umh6/qlsJDC/XMslyAxVvRgFJGHr53yb/nrmY1DU=</td>\n",
       "      <td>BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=</td>\n",
       "      <td>my library</td>\n",
       "      <td>Local playlist more</td>\n",
       "      <td>local-library</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>33.0</td>\n",
       "      <td>female</td>\n",
       "      <td>9.0</td>\n",
       "      <td>20070323.0</td>\n",
       "      <td>20170915.0</td>\n",
       "      <td>206471.0</td>\n",
       "      <td>359</td>\n",
       "      <td>Bastille</td>\n",
       "      <td>Dan Smith| Mark Crew</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           msno  \\\n",
       "0  FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=   \n",
       "1  pouJqjNRmZOnRNzzMWWkamTKkIGHyvhl/jo4HgbncnM=   \n",
       "2  xbodnNBaLMyqqI7uFJlvHOKMJaizuWo/BB/YHZICcKo=   \n",
       "3  s0ndDsjI79amU0RBiullFN8HRz9HjE++34jGNa7zJ/s=   \n",
       "4  Vw4Umh6/qlsJDC/XMslyAxVvRgFJGHr53yb/nrmY1DU=   \n",
       "\n",
       "                                        song_id source_system_tab  \\\n",
       "0  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=           explore   \n",
       "1  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=          discover   \n",
       "2  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=        my library   \n",
       "3  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=        my library   \n",
       "4  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=        my library   \n",
       "\n",
       "     source_screen_name      source_type  target  song_year  city    bd  \\\n",
       "0               Explore  online-playlist     1.0     2016.0   1.0   0.0   \n",
       "1  Online playlist more  online-playlist     0.0     2016.0  15.0  18.0   \n",
       "2   Local playlist more    local-library     1.0     2016.0   1.0   0.0   \n",
       "3   Local playlist more    local-library     1.0     2016.0   5.0  21.0   \n",
       "4   Local playlist more    local-library     0.0     2016.0   6.0  33.0   \n",
       "\n",
       "   gender  registered_via  registration_init_time  expiration_date  \\\n",
       "0     NaN             7.0              20120102.0       20171005.0   \n",
       "1    male             4.0              20151220.0       20170930.0   \n",
       "2     NaN             7.0              20120804.0       20171004.0   \n",
       "3  female             9.0              20110808.0       20170917.0   \n",
       "4  female             9.0              20070323.0       20170915.0   \n",
       "\n",
       "   song_length genre_ids artist_name              composer lyricist  language  \n",
       "0     206471.0       359    Bastille  Dan Smith| Mark Crew      NaN      52.0  \n",
       "1     206471.0       359    Bastille  Dan Smith| Mark Crew      NaN      52.0  \n",
       "2     206471.0       359    Bastille  Dan Smith| Mark Crew      NaN      52.0  \n",
       "3     206471.0       359    Bastille  Dan Smith| Mark Crew      NaN      52.0  \n",
       "4     206471.0       359    Bastille  Dan Smith| Mark Crew      NaN      52.0  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#以下为对训练数据进行合并处理\n",
    "train = df_train.merge(songs_extra, on = 'song_id', how = 'left')\n",
    "train['target'] = train['target'].astype(np.int8)\n",
    "df_train_members = pd.merge(train, df_members, on='msno', how='inner')\n",
    "df_train_merged = pd.merge(df_train_members, df_songs, on='song_id', how='outer')\n",
    "#df_train_merged.drop(['registration_init_time','expiration_date'],axis=1,inplace=True)\n",
    "df_train_merged.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "观察到有缺失值存在。\n",
    "以下为数据探索部分，主要为数据分布的大致了解，缺失值和异常值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9313824\n"
     ]
    }
   ],
   "source": [
    "print(df_train_merged.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "msno                      1936406\n",
      "song_id                         0\n",
      "source_system_tab         1954777\n",
      "source_screen_name        2351210\n",
      "source_type               1957945\n",
      "target                    1936406\n",
      "song_year                 2514264\n",
      "city                      1936406\n",
      "bd                        1936406\n",
      "gender                    4897885\n",
      "registered_via            1936406\n",
      "registration_init_time    1936406\n",
      "expiration_date           1936406\n",
      "song_length                   114\n",
      "genre_ids                  205338\n",
      "artist_name                   114\n",
      "composer                  2591552\n",
      "lyricist                  4855333\n",
      "language                      150\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(df_train_merged.isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#对缺失值较多的可新开特征\n",
    "#df_train_merged[''] = df_train_merged[''].apply(lambda x: 1 if pd.isnull(x) else 0)\n",
    "#df_train_merged[['','']].head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0,0.5,'Number of occurrences')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAEKCAYAAAC7c+rvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAG+ZJREFUeJzt3X+wVOWd5/H3R/AHm0RBvVoEZCDJ\nzW6ISYgSQ5LdxKij6NSIcXUGKxsoh1qSLCamNpNSszMhaszojIm77iRkmZUV3IxInBhJBocw+COb\nmYhgRBR/FDdo5AZWUBAxbnDB7/5xnt4crn27T1/ofgj9eVWd6nO+5/nVVRe+dc55+jmKCMzMzHI4\nLPcAzMysezkJmZlZNk5CZmaWjZOQmZll4yRkZmbZOAmZmVk2TkJmZpaNk5CZmWXjJGRmZtkMzz2A\ng93xxx8f48ePzz0MM7PfKQ8//PALEdHTrJyTUBPjx49nzZo1uYdhZvY7RdIvq5Tz7TgzM8vGScjM\nzLJxEjIzs2ychMzMLBsnITMzy8ZJyMzMsnESMjOzbJyEzMwsGychMzPLxismmHWx5655T+4h2EFo\n3Fce61hfvhIyM7NsnITMzCwb347rgFO/tCj3EOwg9PBfzcg9BLPsfCVkZmbZOAmZmVk2TkJmZpaN\nk5CZmWXTtiQk6ShJD0l6VNJ6SVen+K2SnpG0Nm2TUlySbpbUJ2mdpFNKbc2UtCFtM0vxUyU9lurc\nLEkpfqykFan8CkmjmvVhZmad184rod3AGRHxPmASMFXSlHTuSxExKW1rU+xcoDdts4F5UCQUYC7w\nQeA0YG4tqaQys0v1pqb4lcDKiOgFVqbjQfswM7M82paEovBKOjw8bdGgyjRgUar3IDBS0mjgHGBF\nRGyPiB3ACoqENho4OiJ+FhEBLAIuKLW1MO0vHBCv14eZmWXQ1mdCkoZJWgtspUgkq9Kp69LtsJsk\nHZliY4BNper9KdYo3l8nDnBiRGwBSJ8nNOnDzMwyaGsSioi9ETEJGAucJulk4CrgXwEfAI4FrkjF\nVa+JIcQbqVRH0mxJaySt2bZtW5MmzcxsqDoyOy4iXgLuB6ZGxJZ0O2w38D8onvNAcVVyUqnaWGBz\nk/jYOnGA52u32dLn1iZ9DBzv/IiYHBGTe3p6Wvy2ZmZWVTtnx/VIGpn2RwBnAU+VkoMontU8nqos\nBWakGWxTgJ3pVtpy4GxJo9KEhLOB5encLklTUlszgLtLbdVm0c0cEK/Xh5mZZdDOteNGAwslDaNI\ndksi4keS7pXUQ3FrbC3wmVR+GXAe0Ae8ClwKEBHbJV0LrE7lromI7Wn/s8CtwAjgnrQBXA8skTQL\neA64uFEfZmaWR9uSUESsA95fJ37GIOUDmDPIuQXAgjrxNcDJdeIvAme20oeZmXWeV0wwM7NsnITM\nzCwbJyEzM8vGScjMzLJxEjIzs2ychMzMLBsnITMzy8ZJyMzMsnESMjOzbJyEzMwsGychMzPLxknI\nzMyycRIyM7NsnITMzCwbJyEzM8vGScjMzLJxEjIzs2ychMzMLBsnITMzy6ZtSUjSUZIekvSopPWS\nrk7xCZJWSdog6Q5JR6T4kem4L50fX2rrqhR/WtI5pfjUFOuTdGUp3nIfZmbWee28EtoNnBER7wMm\nAVMlTQFuAG6KiF5gBzArlZ8F7IiIdwA3pXJImghMB94NTAW+LWmYpGHAt4BzgYnAJaksrfZhZmZ5\ntC0JReGVdHh42gI4A7gzxRcCF6T9aemYdP5MSUrxxRGxOyKeAfqA09LWFxEbI+I1YDEwLdVptQ8z\nM8ugrc+E0hXLWmArsAL4BfBSROxJRfqBMWl/DLAJIJ3fCRxXjg+oM1j8uCH0YWZmGbQ1CUXE3oiY\nBIyluHJ5V71i6bPeFUkcwHijPvYhabakNZLWbNu2rU4VMzM7EJomIUlvknRY2n+npPMlHd5KJxHx\nEnA/MAUYKWl4OjUW2Jz2+4GTUj/DgWOA7eX4gDqDxV8YQh8Dxzs/IiZHxOSenp5WvqqZmbWgypXQ\nT4CjJI0BVgKXArc2qySpR9LItD8COAt4ErgPuCgVmwncnfaXpmPS+XsjIlJ8eprZNgHoBR4CVgO9\naSbcERSTF5amOq32YWZmGQxvXgRFxKuSZgH/NSL+UtIjFeqNBhamWWyHAUsi4keSngAWS/oa8Ahw\nSyp/C3CbpD6Kq5PpABGxXtIS4AlgDzAnIvYCSLoMWA4MAxZExPrU1hWt9GFmZnlUSkKSPgR8kt9O\ndW5aLyLWAe+vE99I8XxoYPw3wMWDtHUdcF2d+DJg2YHow8zMOq/K7bgvAFcBd6WrkrdR3O4yMzPb\nL1WuaB4AHpD0pnS8Efh8uwdmZmaHviqz4z6UnuM8mY7fJ+nbbR+ZmZkd8qrcjvvPwDnAiwAR8Sjw\n0XYOyszMukOlH6tGxKYBob1tGIuZmXWZKrPjNkn6MBDp9zifJ92aMzMz2x9VroQ+A8yhWHetn2JF\n7DntHJSZmXWHKrPjXqD4jZCZmdkBVWV23MLa8jvpeJSkBe0dlpmZdYMqt+PemxYgBSAidlBnJQQz\nM7NWVUlCh0kaVTuQdCzVJjSYmZk1VCWZfAP4Z0m1N5VeTJ113MzMzFpVZWLCIkkPAx+neCnchRHx\nRNtHZmZmh7yqt9WeAnbUyksaFxHPtW1UZmbWFZomIUmfA+YCz1OslCCKV2K/t71DMzOzQ12VK6HL\ngX8ZES+2ezBmZtZdqsyO2wTsbPdAzMys+1S5EtoI3C/p74HdtWBEfLNtozIzs65QJQk9l7Yj0mZm\nZnZANL0dFxFXR8TVwI21/XTckKSTJN0n6UlJ6yVdnuJflfQrSWvTdl6pzlWS+iQ9LemcUnxqivVJ\nurIUnyBplaQNku5Iq3wj6ch03JfOj2/Wh5mZdV4736y6B/hiRLwLmALMkTQxnbspIialbVlqdyIw\nHXg3MBX4tqRhkoYB3wLOBSYCl5TauSG11UsxhXxWis8CdkTEO4CbUrlB+6jwXczMrA3a9mbViNgS\nET9P+7soktiYBlWmAYsjYndEPAP0AaelrS8iNkbEa8BiYJokAWcAtZUcFgIXlNpamPbvBM5M5Qfr\nw8zMMujIm1XT7bD3A6tS6DJJ6yQtKK1LN4ZiJl5Nf4oNFj8OeCki9gyI79NWOr8zlR+srYHjnS1p\njaQ127Zta+WrmplZCypN0S6/WVXSn9LCm1UlvRn4O+ALEfEyMA94O8XL8bZQrE0HxY9gB4ohxIfS\n1r6BiPkRMTkiJvf09NSpYmZmB0Jb36wq6XCKBPTdiPg+QEQ8HxF7I+J14G/47e2wfuCkUvWxwOYG\n8ReAkZKGD4jv01Y6fwywvUFbZmaWQcMklB7afyoiPhkRJ0bECRHx76qsnpCewdwCPFn+TZGk0aVi\nnwAeT/tLgelpZtsEoBd4CFgN9KaZcEdQTCxYGhEB3AdclOrPBO4utTUz7V8E3JvKD9aHmZll0PB3\nQhGxV9I0ihlmrfoI8CngMUlrU+zLFLPbJlHcBnsW+HTqa72kJcATFDPr5kTEXgBJlwHLgWHAgohY\nn9q7Algs6WvAIxRJj/R5m6Q+iiug6c36MDOzzqvyY9V/kvTXwB3Ar2vB2sy3wUTET6n/DGZZgzrX\nUeddRWka9xvqRcRG6sxui4jfULz3qHIfZmbWeVWS0IfT5zWlWFBMjzYzMxuyhklI0mHAvIhY0qHx\nmJlZF2k4MSHNYLusQ2MxM7MuU2WK9gpJf5rWgju2trV9ZGZmdsir8kzoT9Jn+bdBAbztwA/HzMy6\nSdMkFBETOjEQMzPrPk2TkKQZ9eIRsejAD8fMzLpJldtxHyjtHwWcCfwccBIyM7P9UuV23OfKx5KO\nAW5r24jMzKxrVHqVwwCvUqy5ZmZmtl+qPBP6Ib993cFhFG839Y9Xzcxsv1V5JnRjaX8P8MuI6G/T\neMzMrItUSULPAVvSoqBIGiFpfEQ829aRmZnZIa/KM6HvAa+XjvemmJmZ2X6pkoSGR8RrtYO0f0T7\nhmRmZt2iShLaJun82kF6yd0L7RuSmZl1iyrPhD4DfDe92A6gH6i7ioKZmVkrqvxY9RfAFElvBhQR\nu9o/LDMz6wZNb8dJ+rqkkRHxSkTskjRK0tc6MTgzMzu0VXkmdG5EvFQ7iIgdwHnNKqX3D90n6UlJ\n6yVdnuLHSlohaUP6HJXiknSzpD5J6ySdUmprZiq/QdLMUvxUSY+lOjdL0lD7MDOzzquShIZJOrJ2\nIGkEcGSD8jV7gC9GxLuAKcAcSROBK4GVEdELrEzHAOdSLAfUC8wG5qX+jgXmAh8ETgPm1pJKKjO7\nVG9qirfUh5mZ5VElCf1PYKWkWZL+BFgBLGxWKSK2RMTP0/4u4ElgDDCtVH8hcEHanwYsisKDwEhJ\no4FzgBURsT1dha0ApqZzR0fEzyIiKFb1LrfVSh9mZpZBlYkJfylpHXBWCl0bEctb6UTSeOD9wCrg\nxIjYktreIumEVGwMsKlUrT/FGsX768QZQh9bBox3NsWVEuPGjWvlq5qZWQuqTNEGeAQ4nGIh00da\n6SDNqvs74AsR8XJ6bFO3aJ1YDCHecDhV6kTEfGA+wOTJk5u1aWZmQ1RldtwfAQ8BFwF/BKySdFGV\nxiUdTpGAvhsR30/h52u3wNLn1hTvB04qVR8LbG4SH1snPpQ+zMwsgyrPhP4T8IGImBkRMygmB/x5\ns0ppptotwJMR8c3SqaVAbYbbTODuUnxGmsE2BdiZbqktB85OU8NHAWcDy9O5XZKmpL5mDGirlT7M\nzCyDKrfjDouIraXjF6mWvD4CfAp4TNLaFPsycD2wRNIsihW6L07nllFM/e6jeHHepQARsV3StcDq\nVO6aiNie9j8L3AqMAO5JG632YWZmeVRJQv8gaTlwezr+Y4r/zBuKiJ9S/xkMwJl1ygcwZ5C2FgAL\n6sTXACfXib/Yah9mZtZ5VWbHfUnShcC/pkgq8yPirraPzMzMDnmVZselSQXfb1rQzMysBVWe7ZiZ\nmbWFk5CZmWUzaBKStDJ93tC54ZiZWTdp9ExotKSPAedLWsyAmW61deHMzMyGqlES+grF6tNjgW8O\nOBfAGe0alJmZdYdBk1BE3AncKenPI+LaDo7JzMy6RJXfCV0r6Xzgoyl0f0T8qL3DMjOzblBlAdO/\nAC4Hnkjb5SlmZma2X6r8WPUPgEkR8TqApIUUr3O4qp0DMzOzQ1/V3wmNLO0f046BmJlZ96lyJfQX\nwCOS7qOYpv1RfBVkZmYHQJWJCbdLuh/4AEUSuiIi/ne7B2ZmZoe+qguYbqF4IZyZmdkB47XjzMws\nGychMzPLpmESknSYpMc7NRgzM+suDZNQ+m3Qo5LGdWg8ZmbWRarcjhsNrJe0UtLS2taskqQFkraW\nr6QkfVXSryStTdt5pXNXSeqT9LSkc0rxqSnWJ+nKUnyCpFWSNki6Q9IRKX5kOu5L58c368PMzPKo\nMjvu6iG2fSvw18CiAfGbIuLGckDSRGA68G7grcA/SnpnOv0t4PeBfmC1pKUR8QRwQ2prsaTvALOA\neelzR0S8Q9L0VO6PB+sjIvYO8fuZmdl+anolFBEPAM8Ch6f91UDTdwlFxE+A7RXHMQ1YHBG7I+IZ\noA84LW19EbExIl4DFgPTJIniVRJ3pvoLgQtKbS1M+3cCZ6byg/VhZmaZVFnA9N9T/Gf+31JoDPCD\n/ejzMknr0u26UaU2N5XK9KfYYPHjgJciYs+A+D5tpfM7U/nB2noDSbMlrZG0Ztu2bUP7lmZm1lSV\nZ0JzgI8ALwNExAbghCH2Nw94OzAJ2AJ8I8VVp2wMIT6Utt4YjJgfEZMjYnJPT0+9ImZmdgBUSUK7\n060wACQNZ5D/vJuJiOcjYm+adfc3/PZ2WD9wUqnoWGBzg/gLwMg0lnJ8n7bS+WMobgsO1paZmWVS\nJQk9IOnLwAhJvw98D/jhUDqTNLp0+AmgNnNuKTA9zWybAPQCD1E8f+pNM+GOoJhYsDQiArgPuCjV\nnwncXWprZtq/CLg3lR+sDzMzy6TK7LgrKWacPQZ8GlgG/PdmlSTdDpwOHC+pH5gLnC5pEsWV1LOp\nPSJivaQlFC/N2wPMqc1ak3QZsBwYBiyIiPWpiyuAxZK+RvF+o1tS/BbgNkl9FFdA05v1YWZmeVRZ\nRfv19CK7VRTJ4+l0ZdGs3iV1wrfUidXKXwdcVye+jCLxDYxvpM7stoj4DXBxK32YmVkeTZOQpD8A\nvgP8guLh/gRJn46Ie9o9ODMzO7RVuR33DeDjEdEHIOntwN8DTkJmZrZfqkxM2FpLQMlGYGubxmNm\nZl1k0CshSRem3fWSlgFLKJ4JXUwxa83MzGy/NLod94el/eeBj6X9bcCoNxY3MzNrzaBJKCIu7eRA\nzMys+1SZHTcB+Bwwvlw+Is5v37DMzKwbVJkd9wOK3/f8EHi9vcMxM7NuUiUJ/SYibm77SMzMrOtU\nSUL/RdJc4MfA7lowIpq+U8jMzKyRKknoPcCnKF4iV7sdF+nYzMxsyKokoU8Abyu/zsHMzOxAqLJi\nwqPAyHYPxMzMuk+VK6ETgackrWbfZ0Keom1mZvulShKa2/ZRmJlZV6ryPqEHOjEQMzPrPlVWTNhF\nMRsO4AjgcODXEXF0OwdmZmaHvipXQm8pH0u6gDpvNDUzM2tVldlx+4iIH1DhN0KSFkjaKunxUuxY\nSSskbUifo1Jckm6W1CdpnaRTSnVmpvIbJM0sxU+V9Fiqc7MkDbUPMzPLo2kSknRhabtI0vX89vZc\nI7cCUwfErgRWRkQvsDIdA5wL9KZtNjAv9X0sxcSID1Jcfc2tJZVUZnap3tSh9GFmZvlUuRL6w9J2\nDrALmNasUkT8BNg+IDwNWJj2FwIXlOKLovAgMFLS6NTfiojYHhE7gBXA1HTu6Ij4WUQEsGhAW630\nYWZmmVR5JnQg3yt0YkRsSe1ukXRCio8BNpXK9adYo3h/nfhQ+tiyv1/KzMyGptHrvb/SoF5ExLUH\ncByq18cQ4kPp440FpdkUt+wYN25ck2bNzGyoGt2O+3WdDWAWcMUQ+3u+dgssfW5N8X7gpFK5scDm\nJvGxdeJD6eMNImJ+REyOiMk9PT0tfUEzM6tu0CQUEd+obcB8YARwKbAYeNsQ+1sK1Ga4zQTuLsVn\npBlsU4Cd6ZbacuBsSaPShISzgeXp3C5JU9KsuBkD2mqlDzMzy6ThM6E0O+0/Ap+keMh/Spog0JSk\n24HTgeMl9VPMcrseWCJpFvAccHEqvgw4D+gDXqVIdkTEdknXAqtTuWsiojbZ4bMUM/BGAPekjVb7\nMDOzfBo9E/or4EKKq6D3RMQrrTQcEZcMcurMOmUDmDNIOwuABXXia4CT68RfbLUPMzPLo9EzoS8C\nbwX+DNgs6eW07ZL0cmeGZ2Zmh7JBr4QiouXVFMzMzFrhRGNmZtk4CZmZWTZOQmZmlo2TkJmZZeMk\nZGZm2TgJmZlZNk5CZmaWjZOQmZll4yRkZmbZOAmZmVk2TkJmZpaNk5CZmWXjJGRmZtk4CZmZWTZO\nQmZmlo2TkJmZZeMkZGZm2WRJQpKelfSYpLWS1qTYsZJWSNqQPkeluCTdLKlP0jpJp5TamZnKb5A0\nsxQ/NbXfl+qqUR9mZpZHziuhj0fEpIiYnI6vBFZGRC+wMh0DnAv0pm02MA+KhALMBT4InAbMLSWV\nealsrd7UJn2YmVkGB9PtuGnAwrS/ELigFF8UhQeBkZJGA+cAKyJie0TsAFYAU9O5oyPiZxERwKIB\nbdXrw8zMMsiVhAL4saSHJc1OsRMjYgtA+jwhxccAm0p1+1OsUby/TrxRH2ZmlsHwTP1+JCI2SzoB\nWCHpqQZlVScWQ4hXlhLjbIBx48a1UtXMzFqQ5UooIjanz63AXRTPdJ5Pt9JIn1tT8X7gpFL1scDm\nJvGxdeI06GPg+OZHxOSImNzT0zPUr2lmZk10PAlJepOkt9T2gbOBx4GlQG2G20zg7rS/FJiRZslN\nAXamW2nLgbMljUoTEs4GlqdzuyRNSbPiZgxoq14fZmaWQY7bcScCd6VZ08OBv42If5C0GlgiaRbw\nHHBxKr8MOA/oA14FLgWIiO2SrgVWp3LXRMT2tP9Z4FZgBHBP2gCuH6QPMzPLoONJKCI2Au+rE38R\nOLNOPIA5g7S1AFhQJ74GOLlqH2ZmlsfBNEXbzMy6jJOQmZll4yRkZmbZOAmZmVk2TkJmZpaNk5CZ\nmWXjJGRmZtk4CZmZWTZOQmZmlo2TkJmZZeMkZGZm2TgJmZlZNk5CZmaWjZOQmZll4yRkZmbZOAmZ\nmVk2TkJmZpaNk5CZmWXjJGRmZtl0ZRKSNFXS05L6JF2ZezxmZt2q65KQpGHAt4BzgYnAJZIm5h2V\nmVl36rokBJwG9EXExoh4DVgMTMs8JjOzrtSNSWgMsKl03J9iZmbWYcNzDyAD1YnFPgWk2cDsdPiK\npKfbPqrucTzwQu5BHAx048zcQ7B9+W+zZm69/yZb9ntVCnVjEuoHTiodjwU2lwtExHxgficH1S0k\nrYmIybnHYTaQ/zbz6MbbcauBXkkTJB0BTAeWZh6TmVlX6roroYjYI+kyYDkwDFgQEeszD8vMrCt1\nXRICiIhlwLLc4+hSvs1pByv/bWagiGheyszMrA268ZmQmZkdJJyErC2aLY0k6UhJd6TzqySN7/wo\nrdtIWiBpq6THBzkvSTenv8t1kk7p9Bi7jZOQHXAVl0aaBeyIiHcANwE3dHaU1qVuBaY2OH8u0Ju2\n2cC8DoypqzkJWTtUWRppGrAw7d8JnCnpgPxCzmwwEfETYHuDItOARVF4EBgpaXRnRtednISsHaos\njfT/y0TEHmAncFxHRmc2OC/r1WFOQtYOTZdGqljGrNP8d9lhTkLWDk2XRiqXkTQcOIbGt0nMOqHK\n364dQE5C1g5VlkZaCtRW8LwIuDf8ozXLbykwI82SmwLsjIgtuQd1KOvKFROsvQZbGknSNcCaiFgK\n3ALcJqmP4gpoer4RW7eQdDtwOnC8pH5gLnA4QER8h2IllfOAPuBV4NI8I+0eXjHBzMyy8e04MzPL\nxknIzMyycRIyM7NsnITMzCwbJyEzM8vGScgsM0kjJf2HDvRzuqQPt7sfs1Y4CZnlNxKonITSDymH\n8m/3dMBJyA4q/p2QWWaSaquMPw3cB7wXGEXxI8o/i4i70/uW7knnPwRcAJwFXEGxrMwGYHdEXCap\nB/gOMC518QXgV8CDwF5gG/C5iPhfnfh+Zo04CZlllhLMjyLi5LSO3r+IiJclHU+ROHqB3wM2Ah+O\niAclvRX4Z+AUYBdwL/BoSkJ/C3w7In4qaRywPCLeJemrwCsRcWOnv6PZYLxsj9nBRcDXJX0UeJ3i\nNQInpnO/TO+4geKdTQ9ExHYASd8D3pnOnQVMLL2e6WhJb+nE4M1a5SRkdnD5JNADnBoR/1fSs8BR\n6dyvS+UavQDwMOBDEfF/ykG/M9AORp6YYJbfLqB2pXIMsDUloI9T3Iar5yHgY5JGpVt4/7Z07sfA\nZbUDSZPq9GN2UHASMsssIl4E/knS48AkYLKkNRRXRU8NUudXwNeBVcA/Ak9QvJ0W4POpjXWSngA+\nk+I/BD4haa2kf9O2L2TWAk9MMPsdJenNEfFKuhK6i+KVGXflHpdZK3wlZPa766uS1gKPA88AP8g8\nHrOW+UrIzMyy8ZWQmZll4yRkZmbZOAmZmVk2TkJmZpaNk5CZmWXjJGRmZtn8PzZ12NOakmAzAAAA\nAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x24442de8be0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(df_train_merged['target'])\n",
    "plt.xlabel('target')\n",
    "plt.ylabel('Number of occurrences')\n",
    "#此处发现分布大致相等"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    6.799560e+06\n",
       "mean     2.011230e+03\n",
       "std      6.682440e+00\n",
       "min      1.918000e+03\n",
       "25%      2.009000e+03\n",
       "50%      2.014000e+03\n",
       "75%      2.016000e+03\n",
       "max      2.017000e+03\n",
       "Name: song_year, dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#以下为观察歌曲年份的影响\n",
    "df_train_merged['song_year'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "此处大致可看出，年份在2009年以后的歌至少占了百分之75，年份在2016到2017的歌 至少占据百分之25，此为数值型特征，可绘制直方图大致观察"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\scipy\\stats\\stats.py:1633: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
      "  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEOCAYAAABM5Pr8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAGXpJREFUeJzt3XuwXWWd5vHvQyJeUK4GhiZEsI0X\ntBQ1A5lBpxW6ITBVE6zWKbBHMspULAemdIpuxW5LvHap1S1KtTqDkjI4KjKoRXomNmaQHssLl6AI\nRNQcUOEIzcVExHbUBn/zx3pPZ3Pc57pC9jnw/VTt2mv/1rvetd4sPI/rstdOVSFJUh97jXoDJEmL\nn2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3mYMkyRPSHJtku8k2Zbkna1+ZJJrkmxP8rkke7f649vn\nsTb/iIG+3trq309y0kB9TauNJTl3oD7ndUiS9rzZHJn8Gji+ql4AHA2sSbIaeD9wflWtBHYCZ7b2\nZwI7q+oZwPmtHUmOAk4DngusAT6aZEmSJcBHgJOBo4DTW1vmug5J0mjMGCbV+UX7+Lj2KuB44LJW\n3wic2qbXts+0+SckSatfUlW/rqofAmPAMe01VlW3VdVvgEuAtW2Zua5DkjQCs7pm0o4gbgDuAbYA\ntwI/q6oHW5Nx4LA2fRhwB0Cbfz9w0GB90jJT1Q+axzokSSOwdDaNquoh4Ogk+wNfBJ4zrFl7H3aE\nUNPUhwXadO2nW8fDJFkPrAfYZ599XvzsZz97yGKSpKlcf/3191XVspnazSpMJlTVz5L8PbAa2D/J\n0nZksBy4szUbBw4HxpMsBfYDdgzUJwwuM6x+3zzWMXl7LwQuBFi1alVt3bp1LsOVpMe8JD+eTbvZ\n3M21rB2RkOSJwB8CtwBXAa9szdYBl7fpTe0zbf5Xqnua5CbgtHYn1pHASuBa4DpgZbtza2+6i/Sb\n2jJzXYckaQRmc2RyKLCx3XW1F3BpVf2vJN8FLknyHuDbwEWt/UXAp5KM0R0tnAZQVduSXAp8F3gQ\nOKudPiPJ2cAVwBJgQ1Vta329ZS7rkCSNRh4r/4fe01ySNHdJrq+qVTO18xvwkqTeDBNJUm+GiSSp\nN8NEktSbYSJJ6s0wkST1NqdvwEuS5ucz19w+tP7qY1fs4S15ZHhkIknqzTCRJPVmmEiSejNMJEm9\nGSaSpN4ME0lSb4aJJKk3w0SS1JthIknqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb4aJJKk3w0SS\n1JthIknqzTCRJPVmmEiSejNMJEm9zRgmSQ5PclWSW5JsS/LGVn9Hkp8kuaG9ThlY5q1JxpJ8P8lJ\nA/U1rTaW5NyB+pFJrkmyPcnnkuzd6o9vn8fa/CNmWockac+bzZHJg8A5VfUcYDVwVpKj2rzzq+ro\n9toM0OadBjwXWAN8NMmSJEuAjwAnA0cBpw/08/7W10pgJ3Bmq58J7KyqZwDnt3ZTrmPe/wqSpF5m\nDJOququqvtWmHwBuAQ6bZpG1wCVV9euq+iEwBhzTXmNVdVtV/Qa4BFibJMDxwGVt+Y3AqQN9bWzT\nlwEntPZTrUOSNAJL59K4nWZ6IXANcBxwdpIzgK10Ry876YLm6oHFxtkVPndMqh8LHAT8rKoeHNL+\nsIllqurBJPe39tOtQ5Iekz5zze1D668+dsUjvu5ZX4BP8mTg88CbqurnwMeA3weOBu4C/nqi6ZDF\nax71+fQ1eZvXJ9maZOu99947ZBFJ0u4wqzBJ8ji6IPl0VX0BoKrurqqHquq3wMfZdZppHDh8YPHl\nwJ3T1O8D9k+ydFL9YX21+fsBO6bp62Gq6sKqWlVVq5YtWzaboUqS5mE2d3MFuAi4pao+OFA/dKDZ\nK4Cb2/Qm4LR2J9aRwErgWuA6YGW7c2tvugvom6qqgKuAV7bl1wGXD/S1rk2/EvhKaz/VOiRJIzCb\naybHAa8BbkpyQ6v9Od3dWEfTnV76EfB6gKraluRS4Lt0d4KdVVUPASQ5G7gCWAJsqKptrb+3AJck\neQ/wbbrwor1/KskY3RHJaTOtQ5K0580YJlX1NYZfo9g8zTLvBd47pL552HJVdRtD7saqql8Br5rL\nOiRJe57fgJck9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wk\nSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfD\nRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3mYMkySHJ7kqyS1JtiV5Y6sfmGRLku3t/YBWT5IL\nkowluTHJiwb6Wtfab0+ybqD+4iQ3tWUuSJL5rkOStOfN5sjkQeCcqnoOsBo4K8lRwLnAlVW1Eriy\nfQY4GVjZXuuBj0EXDMB5wLHAMcB5E+HQ2qwfWG5Nq89pHZKk0ZgxTKrqrqr6Vpt+ALgFOAxYC2xs\nzTYCp7bptcDF1bka2D/JocBJwJaq2lFVO4EtwJo2b9+q+mZVFXDxpL7msg5J0gjM6ZpJkiOAFwLX\nAIdU1V3QBQ5wcGt2GHDHwGLjrTZdfXxInXmsQ5I0ArMOkyRPBj4PvKmqfj5d0yG1mkd92s2ZzTJJ\n1ifZmmTrvffeO0OXkqT5mlWYJHkcXZB8uqq+0Mp3T5xaau/3tPo4cPjA4suBO2eoLx9Sn886Hqaq\nLqyqVVW1atmyZbMZqiRpHmZzN1eAi4BbquqDA7M2ARN3ZK0DLh+on9HuuFoN3N9OUV0BnJjkgHbh\n/UTgijbvgSSr27rOmNTXXNYhSRqBpbNocxzwGuCmJDe02p8D7wMuTXImcDvwqjZvM3AKMAb8Engt\nQFXtSPJu4LrW7l1VtaNNvwH4JPBE4EvtxVzXIUkajRnDpKq+xvBrFAAnDGlfwFlT9LUB2DCkvhV4\n3pD6T+e6DknSnuc34CVJvRkmkqTeDBNJUm+GiSSpN8NEktSbYSJJ6s0wkST1ZphIknozTCRJvRkm\nkqTeDBNJUm+GiSSpN8NEktSbYSJJ6s0wkST1ZphIknozTCRJvRkmkqTeDBNJUm+GiSSpN8NEktSb\nYSJJ6s0wkST1ZphIknozTCRJvRkmkqTeDBNJUm8zhkmSDUnuSXLzQO0dSX6S5Ib2OmVg3luTjCX5\nfpKTBuprWm0sybkD9SOTXJNke5LPJdm71R/fPo+1+UfMtA5J0mjM5sjkk8CaIfXzq+ro9toMkOQo\n4DTguW2ZjyZZkmQJ8BHgZOAo4PTWFuD9ra+VwE7gzFY/E9hZVc8Azm/tplzH3IYtSdqdZgyTqvoq\nsGOW/a0FLqmqX1fVD4Ex4Jj2Gquq26rqN8AlwNokAY4HLmvLbwROHehrY5u+DDihtZ9qHZKkEelz\nzeTsJDe202AHtNphwB0DbcZbbar6QcDPqurBSfWH9dXm39/aT9WXJGlE5hsmHwN+HzgauAv461bP\nkLY1j/p8+vodSdYn2Zpk67333jusiSRpN5hXmFTV3VX1UFX9Fvg4u04zjQOHDzRdDtw5Tf0+YP8k\nSyfVH9ZXm78f3em2qfoatp0XVtWqqlq1bNmy+QxVkjQL8wqTJIcOfHwFMHGn1ybgtHYn1pHASuBa\n4DpgZbtza2+6C+ibqqqAq4BXtuXXAZcP9LWuTb8S+EprP9U6JEkjsnSmBkk+C7wMeGqSceA84GVJ\njqY7vfQj4PUAVbUtyaXAd4EHgbOq6qHWz9nAFcASYENVbWureAtwSZL3AN8GLmr1i4BPJRmjOyI5\nbaZ1SJJGY8YwqarTh5QvGlKbaP9e4L1D6puBzUPqtzHkbqyq+hXwqrmsQ5I0Gn4DXpLUm2EiSerN\nMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk\n3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2Ei\nSerNMJEk9TZjmCTZkOSeJDcP1A5MsiXJ9vZ+QKsnyQVJxpLcmORFA8usa+23J1k3UH9xkpvaMhck\nyXzXIUkajdkcmXwSWDOpdi5wZVWtBK5snwFOBla213rgY9AFA3AecCxwDHDeRDi0NusHllszn3VI\nkkZnxjCpqq8COyaV1wIb2/RG4NSB+sXVuRrYP8mhwEnAlqraUVU7gS3AmjZv36r6ZlUVcPGkvuay\nDknSiMz3mskhVXUXQHs/uNUPA+4YaDfeatPVx4fU57MOSdKILN3N/WVIreZRn886frdhsp7uVBgr\nVqyYoVtJ6u8z19w+6k0Yifkemdw9cWqpvd/T6uPA4QPtlgN3zlBfPqQ+n3X8jqq6sKpWVdWqZcuW\nzWmAkqTZm2+YbAIm7shaB1w+UD+j3XG1Gri/naK6AjgxyQHtwvuJwBVt3gNJVre7uM6Y1Ndc1iFJ\nGpEZT3Ml+SzwMuCpScbp7sp6H3BpkjOB24FXteabgVOAMeCXwGsBqmpHkncD17V276qqiYv6b6C7\nY+yJwJfai7muQ5I0OjOGSVWdPsWsE4a0LeCsKfrZAGwYUt8KPG9I/adzXYckaTT8BrwkqTfDRJLU\nm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wk\nSb0ZJpKk3nb3b8BL0qPKY/U33efKIxNJUm+GiSSpN8NEktSbYSJJ6s0wkST1ZphIknozTCRJvRkm\nkqTeDBNJUm+GiSSpN8NEktSbYSJJ6q1XmCT5UZKbktyQZGurHZhkS5Lt7f2AVk+SC5KMJbkxyYsG\n+lnX2m9Psm6g/uLW/1hbNtOtQ5I0GrvjyOTlVXV0Va1qn88FrqyqlcCV7TPAycDK9loPfAy6YADO\nA44FjgHOGwiHj7W2E8utmWEdkqQReCROc60FNrbpjcCpA/WLq3M1sH+SQ4GTgC1VtaOqdgJbgDVt\n3r5V9c2qKuDiSX0NW4ckaQT6hkkBX05yfZL1rXZIVd0F0N4PbvXDgDsGlh1vtenq40Pq061DkjQC\nfX8c67iqujPJwcCWJN+bpm2G1Goe9VlrAbceYMWKFXNZVJI0B72OTKrqzvZ+D/BFumsed7dTVLT3\ne1rzceDwgcWXA3fOUF8+pM4065i8fRdW1aqqWrVs2bL5DlOSNIN5h0mSfZI8ZWIaOBG4GdgETNyR\ntQ64vE1vAs5od3WtBu5vp6iuAE5MckC78H4icEWb90CS1e0urjMm9TVsHZKkEehzmusQ4Ivtbt2l\nwGeq6u+SXAdcmuRM4HbgVa39ZuAUYAz4JfBagKrakeTdwHWt3buqakebfgPwSeCJwJfaC+B9U6xD\nkjQC8w6TqroNeMGQ+k+BE4bUCzhrir42ABuG1LcCz5vtOiRJo+E34CVJvRkmkqTeDBNJUm+GiSSp\nN8NEktSbYSJJ6q3v41Qk6VHhM9fcPupNWNQ8MpEk9WaYSJJ6M0wkSb0ZJpKk3rwAL+kxxQvtjwyP\nTCRJvRkmkqTeDBNJUm9eM5H0qOS1kT3LIxNJUm8emUha1DwCWRg8MpEk9WaYSJJ6M0wkSb15zUTS\nouC1kYXNIxNJUm+GiSSpN09zSdIITXf67tXHrtiDW9KPYSJpQfHayOLkaS5JUm+L+sgkyRrgw8AS\n4BNV9b4Rb5KkWfDo49Fn0R6ZJFkCfAQ4GTgKOD3JUaPdKkl6bFrMRybHAGNVdRtAkkuAtcB3R7pV\nkv6ZRyCPHYs5TA4D7hj4PA4cO6JtkRYE/3hrVBZzmGRIrR7WIFkPrG8ff5Hk+7t5G54K3Leb+1xo\nHOOjg2NchP7kd0vzGuOQfubiabNptJjDZBw4fODzcuDOwQZVdSFw4SO1AUm2VtWqR6r/hcAxPjo4\nxkeHhTzGRXsBHrgOWJnkyCR7A6cBm0a8TZL0mLRoj0yq6sEkZwNX0N0avKGqto14syTpMWnRhglA\nVW0GNo9wEx6xU2gLiGN8dHCMjw4LdoypqplbSZI0jcV8zUSStEAYJpMk2ZDkniQ3D9RekOSbSW5K\n8rdJ9m31P0pyfatfn+T4gWVe3OpjSS5IMuxW5j1uLuMbmL8iyS+S/OlAbU2S77fxnbsnxzCTuY4x\nyfPbvG1t/hNafUHuQ5jzf6ePS7Kx1W9J8taBZRbyfjw8yVVtm7cleWOrH5hkS5Lt7f2AVk/bT2NJ\nbkzyooG+1rX225OsG9WYJpvHGP+kje3GJN9I8oKBvka7L6vK18AL+DfAi4CbB2rXAX/Qpl8HvLtN\nvxD4vTb9POAnA8tcC/wruu/DfAk4edRjm+v4BuZ/HvifwJ+2z0uAW4GnA3sD3wGOGvXY5rkPlwI3\nAi9onw8ClizkfTiPMb4auKRNPwn4EXDEItiPhwIvatNPAX5A9+ikDwDntvq5wPvb9CltPwVYDVzT\n6gcCt7X3A9r0AaMe3zzH+K8ntp3uUVITYxz5vvTIZJKq+iqwY1L5WcBX2/QW4I9b229X1cR3W7YB\nT0jy+CSHAvtW1Ter29MXA6c+8ls/s7mMDyDJqXT/4xu8U+6fH2VTVb8BJh5lsyDMcYwnAjdW1Xfa\nsj+tqocW8j6EOY+xgH2SLAWeCPwG+DkLfz/eVVXfatMPALfQPfliLbCxNdvIrv2yFri4OlcD+7f9\neBKwpap2VNVOun+bNXtwKFOa6xir6httDABX032/DhbAvjRMZudm4N+16Vfx8C9LTvhj4NtV9Wu6\n/xjGB+aNt9pCNXR8SfYB3gK8c1L7YY+yWcjjg6n34TOBSnJFkm8leXOrL7Z9CFOP8TLgH4G7gNuB\nv6qqHSyi/ZjkCLozAdcAh1TVXdD9MQYObs2mGs+iGOcsxzjoTLojMVgAYzRMZud1wFlJrqc7FP3N\n4MwkzwXeD7x+ojSkj4V829xU43sncH5V/WJS+8U2Pph6jEuBl9A9ceIlwCuSnMCja4zHAA8Bvwcc\nCZyT5OkskjEmeTLdqdY3VdXPp2s6pFbT1BeMOYxxov3L6cLkLROlIc326BgX9fdM9pSq+h7d6RCS\nPBP4txPzkiwHvgicUVW3tvI4uw4/YcijXhaSacZ3LPDKJB8A9gd+m+RXwPXM8CibhWaaMY4D/7eq\n7mvzNtNdi/gfLKJ9CNOO8dXA31XVPwH3JPk6sIru/8ku6P2Y5HF0f2Q/XVVfaOW7kxxaVXe101j3\ntPpUj1gaB142qf73j+R2z8Ucx0iS5wOfoLuG99NWnvHxUo80j0xmIcnB7X0v4G3Af2uf9wf+N/DW\nqvr6RPt2WPpAktXtDqAzgMv3+IbP0lTjq6qXVtURVXUE8CHgL6vqb1iEj7KZaox0T1B4fpIntWsK\nfwB8d7HtQ5h2jLcDx7e7nfahuzj9PRb4fmz/7hcBt1TVBwdmbQIm7shax679sgk4o41zNXB/249X\nACcmOaDdFXViq43cXMeYZAXwBeA1VfWDgfaj35d7+u6Fhf4CPkt3bvmf6NL+TOCNdHdZ/AB4H7u+\n7Pk2unPRNwy8Dm7zVtGdw74V+JuJZUb9msv4Ji33DtrdXO3zKa39rcBfjHpcfcYI/Ae6GwxuBj4w\nUF+Q+3Ae/50+me5uvG10v/fzZ4tkP76E7lTNjQP/+zqF7o67K4Ht7f3A1j50P5h3K3ATsGqgr9cB\nY+312lGPrccYPwHsHGi7daHsS78BL0nqzdNckqTeDBNJUm+GiSSpN8NEktSbYSJJ6s0wkST1ZphI\nA5K8pD3a+/4kO5J8Pcm/3IPrT5KvJnn7pPq6JLcmedKe2hZpLvyeidSk+/2P24E3AJfSPcr7pcA/\nVNWNe3A7VtJ9o/m4qtqWZBndlw3/fVVdtRvXsxdAVf12d/Wpxy6PTKRdnglQVZ+tqoeq6v9V1Zcn\ngiTJXkneluTH6X6Y6uIk+7V5RySpdgRxe5L7kvzFRMdJnpjuB6p2pvshpDcnGR+2EVW1HXgvcFH7\ng38B8PmJIEnyhCQfTHJHkruTfDS7ftDroCSbk9zb1vW3SQ4b2I6vJXl3km/SPb1hxSPxD6nHHsNE\n2uUHwEPtj/7J7TlOg/5je72c7keInkz3mJVBL6H7XZETgLcneU6rn0f3g1RPB/6I7hEu0/kg3eNB\nLgOOA/5sYN5f0T399/nAytbvRHDtBXycLiSeRve4lQ9P6vs1dI8X2ZeHP2Zfmr9RP5vGl6+F9AKe\nA3yS7o/sg3QPyzukzbsS+M8DbZ9F98d6Kd0f9AKWD8y/FjitTd8GnDQw7z8B4zNsy3Nbn2sHansB\nvwKeNlB7KbB9ij5WAfcOfP4a8PZR/zv7evS9fAS9NKCqbqE7+iDJs+keRf8h4HS63wP58UDzH9MF\nySEDtX8YmP4l3dELbdnBHy8anJ5qW7Z1D5V92K9c/gvg8cB3susn6XdNdE8F/jDdk3H3b+WnTOp6\nxnVLc+VpLmkK1f0+yCeB57XSnXSnjiasoDt6uXsW3d3Fw38fZdivdc7G3XQ/evWsqtq/vfarqv3a\n/DfTnQI7pqr2BY4f0od33Wi3M0ykJsmzk5zTfvCMJIfTHZFc3Zp8Fviv7Tcjngz8JfC5qnpwFt1f\nCry1/abGYcDZ89nGqnqI7jHkH0qyrN1KvDzJia3JU+iOiHYmOQh4+1R9SbuTYSLt8gDdr0tek+Qf\n6ULkZuCcNn8D8Cngq8AP6a5d/JdZ9v0uuuswPwT+D92F9V/PczvPoTvFdi1wP/Blugvx0F243w/4\nKfANdv1GuPSI8nsm0ggkeQPdxfk/GPW2SLuDRybSHpDk0CTHte+qPIvu6OKLo94uaXfxbi5pz9gb\n+O90F8d/BlwCfHSkWyTtRp7mkiT15mkuSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6+//ieC4/\njTJ05AAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x2445bbcd240>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#df_train_merged['song_year']=df_train_merged['song_year'].replace(np.NaN,1900)\n",
    "df_train_merged['song_year']=df_train_merged['song_year'].replace(np.NaN,2025)#即把缺失值替换成2025\n",
    "\n",
    "fig = plt.figure()\n",
    "sns.distplot(df_train_merged['song_year'],kde=False)\n",
    "plt.xlabel(\"Song Year\", fontsize=12)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x2447a036438>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa0AAAEKCAYAAAChTwphAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XuYFdWZ7/HvCw2iBlEQEqU1kAGj\nRKLDTTJmkjyaIHomYCY60ZxRoiSMRhPGTIzoeSYqHidmkhknjrc4QYQzBLwkBsegjuMl5uINDFEE\nFUQCLajcxSCX7n7PH2sVtXq7u2na7t5d3b/P8+xnV61aVWvVZddbq6p2lbk7IiIiRdCt0hUQERFp\nLgUtEREpDAUtEREpDAUtEREpDAUtEREpDAUtEREpDAUtEREpDAUtEREpDAUtEREpjKpKV6CjO/TQ\nQ33QoEGVroaISKEsWrRog7v3b+3pKmjtxaBBg1i4cGGlqyEiUihm9se2mK5OD4qISGEoaImISGEo\naImISGHomlYL7N69m5qaGnbs2FHpqrxvvXr1orq6mh49elS6KiIie6Wg1QI1NTX07t2bQYMGYWaV\nrk6LuTsbN26kpqaGwYMHV7o6IiJ7pdODLbBjxw769etX6IAFYGb069evU7QYRaRrUNBqoaIHrExn\nmQ8R6RoUtEREpDAUtNrYli1buPnmm9u8nMcff5zf/e53bV6OiEglKWi1sX0NWu5OfX39PpejoCUi\n7Wn19OEVKVdBq41NmzaNV199leOPP55LLrmEk08+mREjRjB8+HDmz58PwKpVqzjmmGP4+te/zogR\nI1izZg0zZszgqKOO4jOf+Qxf+9rXuPjiiwFYv349X/ziFxk9ejSjR4/mt7/9LatWreLWW2/l+uuv\n5/jjj+fXv/51JWdZRKTN6Jb3NnbdddexZMkSFi9eTG1tLdu3b+eggw5iw4YNjB07lgkTJgDw8ssv\nM3PmTG6++WbWrl3LNddcw3PPPUfv3r056aSTOO644wCYOnUql1xyCZ/85CdZvXo1p5xyCsuWLeOC\nCy7gAx/4AN/+9rcrObsiIm1KQasduTtXXHEFTzzxBN26deP111/nzTffBODDH/4wY8eOBeCZZ57h\n05/+NH379gXgzDPP5JVXXgHgf/7nf1i6dOmeab799tts27atnedERKQyFLTa0Zw5c1i/fj2LFi2i\nR48eDBo0aM9/pA488MA9+dy90WnU19fz5JNPsv/++7d5fUVEOhpd02pjvXv33tMS2rp1KwMGDKBH\njx489thj/PGP5Z/cP2bMGH71q1+xefNmamtr+dnPfrZn2Lhx47jxxhv39C9evPg95YiIdFYKWm2s\nX79+nHjiiRx77LEsXryYhQsXMmrUKObMmcPRRx9ddpyBAwdyxRVXcMIJJ/DZz36WYcOG0adPHwBu\nuOEGFi5cyMc//nGGDRvGrbfeCsDnP/957r33Xt2IISKdmk4PtoOf/vSne82zZMmSBv1f/vKXmTJl\nCrW1tXzhC19g3LhxABx66KHceeed7xn/qKOO4vnnn2+dCouIdFBt1tIys9vN7C0zW5Kk9TWzh81s\nefw+JKabmd1gZivM7HkzG5GMMynmX25mk5L0kWb2QhznBovPI2pJGR3RVVddxfHHH8+xxx7L4MGD\nOf300ytdJRGRimvL04N3AONL0qYBj7j7UOCR2A9wKjA0fqYAt0AIQMCVwAnAGODKLAjFPFOS8ca3\npIyO6oc//CGLFy/mpZde4oYbbtAzAkVEaMOg5e5PAJtKkicCs2L3LOD0JH22B08BB5vZYcApwMPu\nvsndNwMPA+PjsIPc/UkPt9rNLpnWvpQhIiIF0d43YnzQ3dcBxO8BMX0gsCbJVxPTmkqvKZPekjJE\nRKQgOsrdg+XOfXkL0ltSxnszmk0xs4VmtnD9+vV7mayIiLSX9g5ab2an5OL3WzG9BjgiyVcNrN1L\nenWZ9JaU8R7ufpu7j3L3Uf3799+nGRQRkbbT3re83wdMAq6L3/OT9IvNbB7hpout7r7OzB4C/im5\n+WIccLm7bzKzbWY2FngaOBf495aU0RozNfLS2a0xmT0W/eDcZuV78MEHmTp1KnV1dXz1q19l2rRp\nDYbv3LmTc889l0WLFtGvXz/uvPNOBg0a1Kp1FRFpT215y/tc4Engo2ZWY2aTCYHkc2a2HPhc7AdY\nAKwEVgD/AXwdwN03AdcAz8bP9JgGcCHwkzjOq8ADMX2fyiiquro6LrroIh544AGWLl3K3LlzGzyT\nEGDGjBkccsghrFixgksuuYTLLrusQrUVEWkdbdbScvezGxl0cpm8DlzUyHRuB24vk74QOLZM+sZ9\nLaOInnnmGYYMGcJHPvIRAM466yzmz5/PsGHD9uSZP38+V111FQBnnHEGF198Me6u2+dFpLA6yo0Y\nso9ef/11jjgiv0RXXV3N66+/3mieqqoq+vTpw8aNG9u1niIirUlBq6DKPQm+tAXVnDwiIkWioFVQ\n1dXVrFmT/+2spqaGww8/vNE8tbW1bN26dc87ukREikhBq6BGjx7N8uXLee2119i1axfz5s3b8xbk\nzIQJE5g1Kzwc5J577uGkk05SS0tE3peRl85u9Tum94We8t4KmnuLemuqqqrixhtv5JRTTqGuro7z\nzz+fj33sY3z3u99l1KhRTJgwgcmTJ3POOecwZMgQ+vbty7x589q9niIirUlBq8BOO+00TjvttAZp\n06dP39Pdq1cv7r777vaulohIm9HpQRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQzd\n8t4KVk8f3qrTO/K7L+w1z/nnn8/999/PgAEDWLJkyXuGuztTp05lwYIFHHDAAdxxxx2MGDGiVesp\nItLe1NIqqK985Ss8+OCDjQ5/4IEHWL58OcuXL+e2227jwgsvbMfaiYi0DQWtgvrUpz7V5HME58+f\nz7nnnouZMXbsWLZs2cK6da3yzksRkYpR0OqkmvPqEhGRolHQ6qT0WhIR6YwUtDqp5ry6RESkaBS0\nOqkJEyYwe/Zs3J2nnnqKPn36cNhhh1W6WiIi74tueW8FzblFvbWdffbZPP7442zYsIHq6mquvvpq\ndu/eDcAFF1zAaaedxoIFCxgyZAgHHHAAM2fObPc6ioi0NgWtgpo7d26Tw82Mm266qZ1qIyLSPnR6\nUERECkNBS0RECkNBq4XK3VJeRJ1lPkSka1DQaoFevXqxcePGwu/w3Z2NGzfSq1evSldFRKRZdCNG\nC1RXV1NTU8P69esrXZX3rVevXlRXV1e6GiIizaKg1QI9evRg8ODBla6GiEiXo9ODIiJSGApaIiJS\nGApaIiJSGBUJWmZ2iZm9aGZLzGyumfUys8Fm9rSZLTezO82sZ8y7X+xfEYcPSqZzeUx/2cxOSdLH\nx7QVZjYtSS9bhoiIFEO7By0zGwh8Exjl7scC3YGzgO8D17v7UGAzMDmOMhnY7O5DgOtjPsxsWBzv\nY8B44GYz625m3YGbgFOBYcDZMS9NlCEiIgVQqdODVcD+ZlYFHACsA04C7onDZwGnx+6JsZ84/GQL\nL4aaCMxz953u/hqwAhgTPyvcfaW77wLmARPjOI2VISIiBdDuQcvdXwd+CKwmBKutwCJgi7vXxmw1\nwMDYPRBYE8etjfn7pekl4zSW3q+JMkREpAAqcXrwEEIraTBwOHAg4VReqexxE+Vet+utmF6ujlPM\nbKGZLewMfyAWEeksKnF68LPAa+6+3t13Az8H/gI4OJ4uBKgG1sbuGuAIgDi8D7ApTS8Zp7H0DU2U\n0YC73+buo9x9VP/+/d/PvIqISCuqRNBaDYw1swPidaaTgaXAY8AZMc8kYH7svi/2E4c/6uGhf/cB\nZ8W7CwcDQ4FngGeBofFOwZ6EmzXui+M0VoaIiBRAJa5pPU24GeI54IVYh9uAy4BvmdkKwvWnGXGU\nGUC/mP4tYFqczovAXYSA9yBwkbvXxWtWFwMPAcuAu2JemihDREQKoCLPHnT3K4ErS5JXEu78K827\nAzizkelcC1xbJn0BsKBMetkyRESkGPREDBERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRER\nKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwF\nLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRER\nKQwFLRERKQwFLRERKQwFLRERKQwFLRERKQwFLRERKYxmBS0ze6Q5ac1lZgeb2T1m9pKZLTOzT5hZ\nXzN72MyWx+9DYl4zsxvMbIWZPW9mI5LpTIr5l5vZpCR9pJm9EMe5wcwsppctQ0REiqHJoGVmvcys\nL3ComR0Sd/p9zWwQcPj7KPdHwIPufjRwHLAMmAY84u5DgUdiP8CpwND4mQLcEuvWF7gSOAEYA1yZ\nBKFbYt5svPExvbEyRESkAPbW0vo7YBFwdPzOPvOBm1pSoJkdBHwKmAHg7rvcfQswEZgVs80CTo/d\nE4HZHjwFHGxmhwGnAA+7+yZ33ww8DIyPww5y9yfd3YHZJdMqV4aIiBRAVVMD3f1HwI/M7Bvu/u+t\nVOZHgPXATDM7jhAEpwIfdPd1sdx1ZjYg5h8IrEnGr4lpTaXXlEmniTJERKQAmgxaGXf/dzP7C2BQ\nOo67z25hmSOAb7j702b2I5o+TWflqtSC9GYzsymE04sceeSR+zKqiIi0oebeiPH/gB8CnwRGx8+o\nFpZZA9S4+9Ox/x5CEHszntojfr+V5D8iGb8aWLuX9Ooy6TRRRgPufpu7j3L3Uf3792/RTIqISOtr\n7i3vo4AT3f3r7v6N+PlmSwp09zeANWb20Zh0MrAUuA/I7gCcRLhuRkw/N95FOBbYGk/xPQSMizeI\nHAKMAx6Kw7aZ2dh41+C5JdMqV4aIiBRAs04PAkuADwHrWqncbwBzzKwnsBI4jxBA7zKzycBq4MyY\ndwFwGrAC2B7z4u6bzOwa4NmYb7q7b4rdFwJ3APsDD8QPwHWNlCEiIgXQ3KB1KLDUzJ4BdmaJ7j6h\nJYW6+2LKn148uUxeBy5qZDq3A7eXSV8IHFsmfWO5MkREpBiaG7SuastKiIiINEdz7x78VVtXRERE\nZG+aFbTMbBv5beM9gR7An9z9oLaqmIiISKnmtrR6p/1mdjrh0UkiIiLtpkVPeXf3XwAntXJdRERE\nmtTc04N/nfR2I9z5t09PmRAREXm/mnv34OeT7lpgFeHhsyIiIu2mude0zmvrioiIiOxNc589WG1m\n95rZW2b2ppn9zMyq9z6miIhI62nujRgzCc/tO5zwmo//imkiIiLtprlBq7+7z3T32vi5A9Djz0VE\npF01N2htMLO/NbPu8fO3wMa2rJiIiEip5gat84G/Ad4gPOn9DOLT1kVERNpLc295vwaY5O6bAcys\nL+GlkOe3VcVERERKNbel9fEsYEF4lxXw521TJRERkfKaG7S6xbcDA3taWs1tpYmIiLSK5gaefwF+\nZ2b3EB7f9DfAtW1WKxERkTKa+0SM2Wa2kPCQXAP+2t2XtmnNRERESjT7FF8MUgpUIiJSMS16NYmI\niEglKGiJiEhhKGiJiEhhKGiJiEhh6L9WIiKyV6unD49dl1a0HmppiYhIYShoiYhIYShoiYhIYSho\niYhIYShoiYhIYShoiYhIYVQsaJlZdzP7vZndH/sHm9nTZrbczO40s54xfb/YvyIOH5RM4/KY/rKZ\nnZKkj49pK8xsWpJetgwRESmGSra0pgLLkv7vA9e7+1BgMzA5pk8GNrv7EOD6mA8zGwacBXwMGA/c\nHANhd+Am4FRgGHB2zNtUGSIiUgAVCVpmVg38L+Ansd8Irz25J2aZBZweuyfGfuLwk2P+icA8d9/p\n7q8BK4Ax8bPC3Ve6+y5gHjBxL2WIiEgBVKql9W/Ad4D62N8P2OLutbG/BhgYuwcCawDi8K0x/570\nknEaS2+qDBERKYB2D1pm9lfAW+6+KE0uk9X3Mqy10svVcYqZLTSzhevXry+XRUREKqASLa0TgQlm\ntopw6u4kQsvrYDPLnoVYDayN3TXAEQBxeB9gU5peMk5j6RuaKKMBd7/N3Ue5+6j+/fu3fE5FRKRV\ntXvQcvfL3b3a3QcRbqR41N3/N/AYcEbMNgmYH7vvi/3E4Y+6u8f0s+LdhYOBocAzwLPA0HinYM9Y\nxn1xnMbKEBGRAuhI/9O6DPiWma0gXH+aEdNnAP1i+reAaQDu/iJwF7AUeBC4yN3r4jWri4GHCHcn\n3hXzNlWGiIgUQEVfTeLujwOPx+6VhDv/SvPsAM5sZPxrgWvLpC8AFpRJL1uGiIgUQ0dqaYmISAc0\n8tLZla7CHgpaIiJSGApaIiJSGApaIiJSGApaIiJSGApaIiJSGApaIiJSGApaIiJSGApaIiJSGApa\nIiJSGApaIiJS1urpwytdhfdQ0BIRkcJQ0BIRkcJQ0BIRkcJQ0BIRkcJQ0BIRkVbRHq8wUdASEZHC\nUNASEZHCqKp0BUREpPNo6/92qaUlIiLv0R7Xp1pCQUtERApDQUtERApDQUtERApDQUtERApDQUtE\nRApDQUtERApDQUtERApDQUtERApDQUtERApDQUtERAqj3YOWmR1hZo+Z2TIze9HMpsb0vmb2sJkt\nj9+HxHQzsxvMbIWZPW9mI5JpTYr5l5vZpCR9pJm9EMe5wcysqTJERKQYKtHSqgX+wd2PAcYCF5nZ\nMGAa8Ii7DwUeif0ApwJD42cKcAuEAARcCZwAjAGuTILQLTFvNt74mN5YGSIiUgDtHrTcfZ27Pxe7\ntwHLgIHARGBWzDYLOD12TwRme/AUcLCZHQacAjzs7pvcfTPwMDA+DjvI3Z90dwdml0yrXBkiIlIA\nFb2mZWaDgD8HngY+6O7rIAQ2YEDMNhBYk4xWE9OaSq8pk04TZYiIdDkjL53N6unDG7xOpK1fLfJ+\nVex9Wmb2AeBnwN+7+9vxslPZrGXSvAXp+1K3KYTTixx55JH7MqqISCFlryK5t3eFK7IXFWlpmVkP\nQsCa4+4/j8lvxlN7xO+3YnoNcEQyejWwdi/p1WXSmyqjAXe/zd1Hufuo/v37t2wmRUSk1VXi7kED\nZgDL3P1fk0H3AdkdgJOA+Un6ufEuwrHA1nhq7yFgnJkdEm/AGAc8FIdtM7OxsaxzS6ZVrgwRESmA\nSpwePBE4B3jBzBbHtCuA64C7zGwysBo4Mw5bAJwGrAC2A+cBuPsmM7sGeDbmm+7um2L3hcAdwP7A\nA/FDE2WIiEgBtHvQcvffUP66E8DJZfI7cFEj07oduL1M+kLg2DLpG8uVISIixaAnYoiISGEoaImI\nSGEoaImIdCHZre1FpaAlItLFdPQ/EDdFQUtEpBMbeenswreuUgpaIiJSGApaIiJdQJFPCaYUtERE\npDAUtEREpDAUtEREpDAUtEREOpnOdLdgKQUtEREpDAUtEREpjIq9uVhERNpOfov7pRWtR2tTS0tE\npBPozNexUgpaIiJSGApaIiIFkbamyj1TsLM89aIpCloiIlIYCloiIgWyevrwBi2qrtC6SiloiYh0\nYF3lBovmUtASEelAOtv7r1qbgpaISAfUlU8BNkVBS0SkwtSyaj4FLRGRNlJ6qq+p4FR6g0WRtGe9\nFbRERCpArauWUdASEWlF5YJRY9enity6yrR38NUDc0VEWllnfVhtR6CWloiIFIaCloiIFIaClojI\nPtqXuwKldXW5oGVm483sZTNbYWbTKl0fESmGfbnBQtpOlwpaZtYduAk4FRgGnG1mwypbKxHpKPbW\nguoMd/sVXZcKWsAYYIW7r3T3XcA8YGKF6yQi7aw0MDX15HQFqY6lqwWtgcCapL8mpolIO2msJdNU\nKycLLGn36unD94xTOmxv+UDBqKjM3Stdh3ZjZmcCp7j7V2P/OcAYd/9GSb4pwJTY+1HgZeBQYENM\na6y7qWEdIV9HqIPmSfPeGvk6Qh00T03n+7C796e1uXuX+QCfAB5K+i8HLm/muAv31t3R83WEOmie\nNO+a945bh7bI19qfrnZ68FlgqJkNNrOewFnAfRWuk4iINFOXeoyTu9ea2cXAQ0B34HZ3f7HC1RIR\nkWbqUkELwN0XAAtaMOptzeju6Pk6Qh1aO19HqEOl8nWEOlQqX0eoQ2vn6wh1aIt8rapL3YghIiLF\n1tWuaYmISIF1idODZnY78FfAW+5+bOw/HegF7AAOJPxnqzfQF7A46m5CYO+epO2ZLODx0y1+l+Yp\ninr2/QAma6K/33nOllt97O9Wkt5Y/rbSltNv7rQrNe8dTVPz217LqLXr0JL6pfuZUvVxevsyzZbU\nYVcsqxdQS75PrCXsJ/dL6lGb5O9B/vveDmyO3y8Co2P3V+LntKzf3Z9rrCJdpaV1BzC+pH8d8AYh\neH0POBK4BvgUYQE/Q7j2tYGwIP8NqIvDpgK/IayMewgL3AgB8DJgZyxnVywDYA5we+zeDlxNvqN+\ngbCiAR4AfhK7dwI3xukA/Gsy7buB7yf5vp3M37XJ9J4k///EXCD7x6bHfDsJ28EvgOXJtO+N3e/G\nur4V+y+J82mEP2qvTvKtTKa9PJm/NwgbtgNbCAcIHoevjnXtBvwJ2BbHuYv8j+C7gWWxXIA/ENYf\nsd4/Tcq9iDygfpN8+d8dy6mPn5uT+v1rLBvgCSD7wdQD1xHWe5avJnbPJmwvWb4byf04Lg+AJYTl\nB2G9Z8uhHrgimfY/A/8ndq8qqcN3COvJYr5/i8PuT+rwJ+AfkzpcRb68HiLfprYTtqds+7gm6Z4N\n3BK7dyZ1yKa3JXa/QNiWALYmdcjyZdvogjiPxPn8Z/J1swlYH7t/Qfh9EOsyl3xnvIJ8+/9dkm8z\n8Evy5fejZJz/CyyK6U8BM5NpX5iMk87jbMJ+Ict3NfnO/ZlknMdj2RDWcbY8dxN2xNk428i3qfo4\nvD7p3xLz1sXx6+JnC/nyW0S+DmvjJwtgO2LdswPmpTGtNtYr2853l5T1KvlyzfK9HYevjmXXxnwO\nvBSnWxXnZ1Mcpw5YSNgPdifsF2YSfpd/AL5KCGROeIDDFTHf0Jjv1Ng9hfD7HZr0Z9tgWV0iaLn7\nE4SFnfYfCWyP3Q8RVsqNwFrCcjkydh9EOFp4kbCyusd878TJfRz479jdA/gz4L9i/26gZ+weSb5z\n2C8Oy/rPId+gj6Fhq6MnYaMCmBHLABgBHBa7q4Ah5D/uq8g39v7k63kU8KHYbcAPk/r1IQ9MI4EB\nsbs7YSdwSOx/iXyH3DuZh56EDTn70c4n1ysOM+ADcZw68mBZleTLdmSfTpZDd8IOsGecxsPkO79j\nkroaebCFhuvp+PjdLearIl9e08iX6xhC4Mry9k3q8W3g4Ng9Os5Lxsh3bBcS1m9W96GxO/sRG4C7\nf498+U0jrFOA6qQO5u4/SOo3LZmXjyZ16ElYt1ldp5MvoyMJ6xTCMiYp98pknNGEZ3Jm08vqkE0v\nW089gKNj9/40XA7XJt1DknnvTtiGsvXxVhyXWObhSb7soAhCwO4euz8Yp5nNx37JsN8l48xMpvdn\n5Muuu7vfmtSvnrANA/wl+fbQjfzgAsLBSjbto8h/j73Id+IA/cjPwBwIbCQPpBACXNo6yYbtJmw7\n3YHF5Mv5Q8k4PQi/vWz69ck81hK2y+z3kQWrbuT7sPo4Xja/vQkHhdlvcncst3vMl7WcDiI/YOoL\nPBLzbYvLdmAc527giDjfhxECe3fy9X10nJcPpd3u/lRMu8+Dp4CDzSzbt71XW/4JrCN9gEHAkqR/\nEfDH2H1NXFETY75d5K2cNXGFf4VwdOKEDTU7EnmXsCPJjoDuJxwBZcM2Jd2/p+GRUnbk9LVYZnY0\n9Kck39ZYFycckbybDEs/25Lu1Ul3fdK9M5lWVlbWva6kP/2kZb5Tki/tXpyU96Vk/nbEcrO8ryTd\n7ybdOwk/snQZZd1/KJl2Or/pPL6SdK9M5reu5PvtZLy6JL2e0DpL5y/Lt7NknLeTfOlR8O+byJfW\n9ZGkO61PPWGHV27dbk/KyraVbLx0u/lCkv5uHJYti13JNL5csgzSOswqWZbZMtpdkv/pkvlNt510\nuZQuv7Ss+pLx0u2tXHpp/ndLurNydxNaBNmwqmTYTvLf9E7gV0m+Hcm83ErDFnJadnbaK1uu6fDS\n73eS/jVJ3tpkHb/RxLLcWNKf/j5qk+/6MsstK2tLMu7WkrLeSPKuL6lD9n1HLGM9eXDcQThQfzd2\nzyWcVdoZp/lcLHcV4cD5/rhORsX97wZgcrJvfiQbpj8XN/QdoK+ZLSIcadQTTi39F2ED6g78DWEB\nQmi9DIj5lpEHt/2AfyJsuBBaCNlRaE/C0WXWvD6e/ChtJ/lRzd8RjqSyjT47AoVwtJQdeWXX4bIN\nfTP5Ud7+5Ef3aVBId8jdYjnZ6cIlMR1CSzI70qxNpgXh1EOWr2dSZnrECJA+zG1ynL+sddojmcaQ\npE5P0vCa4KExT9Yyy9Qm/V8lP7qvJd9pQGilZN2vJeNkdczsT77Onkjy1RGWcTaNPyXDfpOMX084\nqszybSFfZkcl+Zz3ylqqn0jGWUm+/L3kO31e5islw/dLhmXbD4TTXdlReVWcp+z0XrYustOk0DCg\nZC2A02m4w8z2F9nyeDd2j0rq8GhSh9L52EG+/WdBgeQ7a/29S8N1SDI8W17Z6fisv0fSnc0Hcd5H\nJv1vJ/XvCXw46f7LpA5VSb5JsX83Da+/ZnXN8vUgDxoQzuBk42QBBfID1Gw+smUC4XZxS4ZvTYb9\nNOneQX6WZBd5iybbp2R1eJSGv6UDk/l9MJneO0me3YSWW7but9DwtwjhkspOwr5yP+B5QoDqQdgv\nTYjz+213H0F4sMMAwj4wm1a53wZ7G9aVg9arhJbWSMJTMXa6+zjg84Qf6GrCdZVZhA0k+95FOOV2\nJWFj2En4cWcthjrCCso2lLfJm9crCUcou8g3LgOOi3XqTljR95FveHXkR01VsbuecDT/A/IjrP+I\n03BCyy/bkb9Gfg5+F2Gnlx1FTSHfONJXtGwHfhu764ATyHcsz5Hv7LP6Z9tRelonOx3VLUnLTuXU\nkV+gzeY9OwXSIxkv3TkMSro5hlVNAAAGoklEQVTHJvV+nXCdIzty/7Mk32WEI2kI6ztbVvXkpznr\ngUtpGDCyC8pOOGr0ZHrZuvgj+c6mHviXZLlcRX49Yy3hQCg7gEhPO2XL0uM8vJwsi3Sn+bFknuYQ\n1j2EVvw28oOYF+P06gnXsLLguJFwZiE9iMmWxWtxGttjvpeTcnvRMGBnp5w3xOllLbcsWOx298+R\nB52VNNwu55EH2Y1JPbLtMqtbWocj4nc9YZln16r+BPw6WS5Lk3LOI/x2IWz7/xmnV+vuB5AHj+2E\n01pZvuya1E5CayCTBY4qwhmJmUlZP0+G18d6ZdvuH8h/G1vIA0YaOLMDggNi/xnk+44t5MscQqs4\nGydrkWX125AMW09+4Lsy1mFH/KyN6dsI152z7Xw34ZRhdsCyuaQOWX3PJPyO/5b8GpcTTh1+kLDu\nxxC20W7k+42spX4E4brw4UldqsgDMIQDz7U0oisHrX4AZtYNuJh8wzPCwn8s9v8DYcVsIbSaqgjP\nLKwm/KhvJ/ygs2sdNxF2qhA2hDcJ14tWE86ND4plZBfG6wk7ueyOnDsI12+yjeQXhI3dCC2CjbEO\n8wgrvydhQ3mU5Nw9YaPtRjg6yzbERXF6Qwkb2kjyDf/j5DuBfyS/qeIdwoaa5ZtDuH5AXA6Px2lt\njfMHYWOeSH5K4nLyAPEO4UaF7JTI35MfeU4mXOSH0Jr6cex+l7COsqPYX8blCuFGifXkAf2EZNmt\nJL8Wd1Mso3ssby75OjuUfEf2HcI1PGIZi8h/2H3Jj5xXEn6Y2ZH3i4Tz/8RpZTvnZ+M8GXnAy3Yo\nPyUPDK+S72h/Q7iwDeGHPpw80L1K2AYgtMCejPO0jXCN60DCTubL5IHvAOBnhCPdLTH/frGeHyI/\nsu5JHmS2kgfet4CzyVvEBxKu4/ah4XXMOjMbnJQ7h3Dwk123WUTeis1uVqiKy+eX5Nd6HyU/ys9u\nROpGaD2+Euu0kbD9Z/uwZeStnlXkB0gPkNtqZp+IZWbTz7aBlYQbLqritO9Jlv8ZSf6rgBPjsNWE\n7at/HPYW+cHYO8AXaXgWISt3P8I2l2076R3L2Q1GTjgTks1fdvowO0BIW/k9CGce0gOu7ID3DPJA\nuo18G32JcC3dCQGvD/kBz464/OoI2/VrMX05YZvbFct7mTy4fJGw3nbFuq0gHJSdZ2YWp7E/YXt9\nmbBNvmFmYwm/swkWjAW2unu2HN6jS/y52MzmAp8hbChvEo7OjyasQCcs6Gxjy06jiUjzZTtMaZtl\nUU/5/VJHWO5ZHbLA3o0QsB4nv/TRjbDv3UkIjjsJp2bfILSMzyPc4b0dOM/dFzZWWJcIWiIi0jmo\nRSEiIoWhoCUiIoWhoCUiIoWhoCUiIoWhoCUiIoWhoCUijTKz7nvPJdJ+FLRE2oCZHWhmvzSzP5jZ\nEjP7kpmdbGa/N7MXzOx2M9sv5l1lZleb2XNx2NExvb+ZPRzTf2xmfzSzQxsp7xozm5r0X2tm34zd\nl5rZs2b2vJldneT5hZktMrMXzWxKkv6OmU03s6cJj5oS6TAUtETaxnhgrbsf5+7HEp7zdgfwJXcf\nTng6woVJ/g3xGW23kL9m5krg0Zh+L+Fp7Y2ZQXhGXvaUl7OAOWY2jvAElDGE576NNLNPxXHOj48x\nGwV808z6xfQDCQ+XPsHd0+ctilScgpZI23gB+KyZfd/M/pLw+K7X3D17DNEswrvbMj+P34vIn7P4\nScLjunD3B8mfIfke7r4K2Ghmfw6MA37v7htj9zjCI6eeIzwJJntdyDfN7A+Ed04dkaTXER77JNLh\ndIk3F4u0N3d/xcxGEt7G+j3yd641JnvxXx3573JfH8/zE8IrdD5E/sJRA77n7j9OM5rZZ4DPAp9w\n9+1m9jj5u7Z2uHsdIh2QWloibcDMDie8ZPQ/CS/b/AtgkJllLzE8h/D+pqb8hvB6HOJpvkOazs69\nhNOSowmvxSB+n29mH4jTGWhmAwgPSN0cA9bR5A95FunQ1NISaRvDgR+YWfaivAsJgeJuM6siPP39\n1ibGh/DK97lm9iVCgFtHeFJ3We6+y8weA7ZkLSV3/28zOwZ4Mjxsm3cIr5V4ELjAzJ4nPHX7qRbP\nqUg70gNzRTqoeHdhnbvXxldq3OLuxzeRP3u69pnuvry96inSntTSEum4jgTuisFoF/C1xjKa2TDC\niwvvVcCSzkwtLZECibelP1Jm0MnxbkGRTk1BS0RECkN3D4qISGEoaImISGEoaImISGEoaImISGEo\naImISGEoaImISGH8f/sC2Z93Pm+VAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x24451969ba8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(x=\"song_year\", hue=\"target\",data=df_train_merged)\n",
    "#发现target为1和0分布都较为平均，不受年份影响"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#用中值填补缺失值\n",
    "median=np.median(df_train_merged['song_year'])\n",
    "df_train_merged['song_year']=df_train_merged['song_year'].replace(2025,median)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    9.313710e+06\n",
       "mean     2.454552e+05\n",
       "std      9.563275e+04\n",
       "min      1.850000e+02\n",
       "25%      2.094440e+05\n",
       "50%      2.389680e+05\n",
       "75%      2.728220e+05\n",
       "max      1.217385e+07\n",
       "Name: song_length, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_merged['song_length'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "歌曲长度大致分布在2e+05至2.7e+05之内"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\scipy\\stats\\stats.py:1633: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
      "  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAENCAYAAADKcIhSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFrxJREFUeJzt3X+wZ3V93/HnS1aUVBGUlSG74JK4\n/kCn/roFGtNExeJCOi5/aGbRhFU33Uo1Y1OnEZpMSTRJsZ2EyMQfxcAIThCJ1bJjNLhFrGkF5FIU\n+RHDRhTuQGV1AbUkWsi7f5zPytf17t7v9/vZvd9d9/mY+c4953M+53w+n+Eur/s553zPSVUhSVKP\nx826A5KkA59hIknqZphIkroZJpKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSp24pZd2C5HHXUUbVm\nzZpZd0OSDig33XTTt6pq5VL1DpowWbNmDfPz87PuhiQdUJJ8Y5x6nuaSJHUzTCRJ3QwTSVI3w0SS\n1G2sMEny9SRfSfKlJPOt7KlJtia5s/08spUnyYVJtiW5JcmLR46zsdW/M8nGkfKXtONva/tm2jYk\nSctvkpnJy6vqhVU119bPAa6pqrXANW0d4DRgbftsBt4PQzAA5wEnAScC5+0Mh1Zn88h+66ZpQ5I0\nGz2nudYDl7blS4EzRsovq8H1wBFJjgFeBWytqh1V9QCwFVjXth1eVdfV8NrHy3Y51iRtSJJmYNww\nKeAzSW5KsrmVHV1V9wG0n09v5auAe0b2XWhleypfWKR8mjYkSTMw7pcWX1pV9yZ5OrA1yV/voW4W\nKaspyvdkrH1a8G0GOO6445Y4pCRpWmOFSVXd237en+QTDNc8vpnkmKq6r51iur9VXwCOHdl9NXBv\nK3/ZLuWfa+WrF6nPFG3s2u+LgIsA5ubmlgqo3br8hrsXLX/dSQaUJMEYp7mS/KMkT965DJwK3Aps\nAXbekbURuKotbwHOandcnQw81E5RXQ2cmuTIduH9VODqtu27SU5ud3GdtcuxJmlDkjQD48xMjgY+\n0e7WXQFcXlV/meRG4Mokm4C7gde2+p8CTge2AQ8DbwSoqh1J3gXc2Oq9s6p2tOWzgQ8BhwGfbh+A\n8ydpQ5I0GxluoPrJNzc3V9M+6NHTXJIOVkluGvlKyG75DXhJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3\nw0SS1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3\nw0SS1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUbcWsO3Agu/yGuxctf91J\nxy1zTyRptpyZSJK6GSaSpG6GiSSpm2EiSepmmEiSuhkmkqRuY4dJkkOS3Jzkk239+CQ3JLkzyUeT\nHNrKn9DWt7Xta0aOcW4r/2qSV42Ur2tl25KcM1I+cRuSpOU3yczkbcAdI+vvBi6oqrXAA8CmVr4J\neKCqnglc0OqR5ARgA/A8YB3wvhZQhwDvBU4DTgDObHUnbkOSNBtjhUmS1cAvAX/a1gO8AvhYq3Ip\ncEZbXt/WadtPafXXA1dU1fer6i5gG3Bi+2yrqq9V1Q+AK4D1U7YhSZqBcWcmfwz8JvAPbf1pwINV\n9UhbXwBWteVVwD0AbftDrf4Py3fZZ3fl07TxI5JsTjKfZH779u1jDlWSNKklwyTJvwDur6qbRosX\nqVpLbNtb5Uu1/1hB1UVVNVdVcytXrlxkF0nS3jDOs7leCrw6yenAE4HDGWYqRyRZ0WYGq4F7W/0F\n4FhgIckK4CnAjpHynUb3Waz8W1O0IUmagSVnJlV1blWtrqo1DBfQP1tVrweuBV7Tqm0ErmrLW9o6\nbftnq6pa+YZ2J9bxwFrgi8CNwNp259ahrY0tbZ9J25AkzUDPU4PfAVyR5PeAm4GLW/nFwIeTbGOY\nLWwAqKrbklwJ3A48Arylqh4FSPJW4GrgEOCSqrptmjYkSbORg+UP+rm5uZqfn59q3909an53fAS9\npJ8USW6qqrml6vkNeElSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS\n1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS\n1M0wkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3ZYMkyRPTPLF\nJF9OcluS323lxye5IcmdST6a5NBW/oS2vq1tXzNyrHNb+VeTvGqkfF0r25bknJHyiduQJC2/cWYm\n3wdeUVUvAF4IrEtyMvBu4IKqWgs8AGxq9TcBD1TVM4ELWj2SnABsAJ4HrAPel+SQJIcA7wVOA04A\nzmx1mbQNSdJsLBkmNfheW318+xTwCuBjrfxS4Iy2vL6t07afkiSt/Iqq+n5V3QVsA05sn21V9bWq\n+gFwBbC+7TNpG5KkGRjrmkmbQXwJuB/YCvwt8GBVPdKqLACr2vIq4B6Atv0h4Gmj5bvss7vyp03R\nhiRpBsYKk6p6tKpeCKxmmEk8d7Fq7ediM4Tai+V7auNHJNmcZD7J/Pbt2xfZRZK0N0x0N1dVPQh8\nDjgZOCLJirZpNXBvW14AjgVo258C7Bgt32Wf3ZV/a4o2du3vRVU1V1VzK1eunGSokqQJjHM318ok\nR7Tlw4BXAncA1wKvadU2Ale15S1tnbb9s1VVrXxDuxPreGAt8EXgRmBtu3PrUIaL9FvaPpO2IUma\ngRVLV+EY4NJ219XjgCur6pNJbgeuSPJ7wM3Axa3+xcCHk2xjmC1sAKiq25JcCdwOPAK8paoeBUjy\nVuBq4BDgkqq6rR3rHZO0IUmajRwsf9DPzc3V/Pz8VPtefsPdE9V/3UnHTdWOJO1vktxUVXNL1fMb\n8JKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSpm2EiSepmmEiSuhkmkqRuhokkqZthIknqZphIkroZ\nJpKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSpm2EiSepmmEiSuhkmkqRuhokkqZthIknqZphIkroZ\nJpKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSpm2EiSepmmEiSuhkmkqRuS4ZJkmOTXJvkjiS3JXlb\nK39qkq1J7mw/j2zlSXJhkm1Jbkny4pFjbWz170yycaT8JUm+0va5MEmmbUOStPzGmZk8Ary9qp4L\nnAy8JckJwDnANVW1FrimrQOcBqxtn83A+2EIBuA84CTgROC8neHQ6mwe2W9dK5+oDUnSbCwZJlV1\nX1X977b8XeAOYBWwHri0VbsUOKMtrwcuq8H1wBFJjgFeBWytqh1V9QCwFVjXth1eVddVVQGX7XKs\nSdqQJM3ARNdMkqwBXgTcABxdVffBEDjA01u1VcA9I7sttLI9lS8sUs4Ubeza381J5pPMb9++fZKh\nSpImMHaYJHkS8F+Bf1NV39lT1UXKaoryPXZnnH2q6qKqmququZUrVy5xSEnStMYKkySPZwiSP6uq\nj7fib+48tdR+3t/KF4BjR3ZfDdy7RPnqRcqnaUOSNAPj3M0V4GLgjqr6o5FNW4Cdd2RtBK4aKT+r\n3XF1MvBQO0V1NXBqkiPbhfdTgavbtu8mObm1ddYux5qkDUnSDKwYo85LgV8FvpLkS63s3wPnA1cm\n2QTcDby2bfsUcDqwDXgYeCNAVe1I8i7gxlbvnVW1oy2fDXwIOAz4dPswaRuSpNlYMkyq6n+y+DUK\ngFMWqV/AW3ZzrEuASxYpnweev0j5tydtQ5K0/PwGvCSpm2EiSepmmEiSuhkmkqRuhokkqZthIknq\nZphIkroZJpKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSpm2EiSepmmEiSuhkmkqRuhokkqZthIknq\nZphIkroZJpKkboaJJKmbYSJJ6maYSJK6GSaSpG6GiSSpm2EiSepmmEiSuhkmkqRuhokkqZthIknq\nZphIkroZJpKkbkuGSZJLktyf5NaRsqcm2ZrkzvbzyFaeJBcm2ZbkliQvHtlnY6t/Z5KNI+UvSfKV\nts+FSTJtG5Kk2RhnZvIhYN0uZecA11TVWuCatg5wGrC2fTYD74chGIDzgJOAE4HzdoZDq7N5ZL91\n07QhSZqdJcOkqj4P7NileD1waVu+FDhjpPyyGlwPHJHkGOBVwNaq2lFVDwBbgXVt2+FVdV1VFXDZ\nLseapA1J0oxMe83k6Kq6D6D9fHorXwXcM1JvoZXtqXxhkfJp2pAkzcjevgCfRcpqivJp2vjxisnm\nJPNJ5rdv377EYSVJ05o2TL6589RS+3l/K18Ajh2ptxq4d4ny1YuUT9PGj6mqi6pqrqrmVq5cOdEA\nJUnjmzZMtgA778jaCFw1Un5Wu+PqZOChdorqauDUJEe2C++nAle3bd9NcnK7i+usXY41SRuSpBlZ\nsVSFJB8BXgYclWSB4a6s84Erk2wC7gZe26p/Cjgd2AY8DLwRoKp2JHkXcGOr986q2nlR/2yGO8YO\nAz7dPkzahiRpdpYMk6o6czebTlmkbgFv2c1xLgEuWaR8Hnj+IuXfnrQNSdJs+A14SVI3w0SS1M0w\nkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0w\nkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUjfDRJLUzTCRJHUzTCRJ3QwTSVI3w0SS1M0w\nkSR1M0wkSd0ME0lSN8NEktTNMJEkdTNMJEndDBNJUrcDNkySrEvy1STbkpwz6/5I0sFsxaw7MI0k\nhwDvBf45sADcmGRLVd0+254NLr/h7t1ue91Jxy1jTyRpeRyoM5MTgW1V9bWq+gFwBbB+xn2SpIPW\nATkzAVYB94ysLwAnzagvE9ndrMUZi6QD2YEaJlmkrH6sUrIZ2NxWv5fkq1O2dxTwrSn3Hcvr9+XB\nH7PPx7FMHMf+xXHsX/b2OJ4xTqUDNUwWgGNH1lcD9+5aqaouAi7qbSzJfFXN9R5n1hzH/sVx7F8c\nR58D9ZrJjcDaJMcnORTYAGyZcZ8k6aB1QM5MquqRJG8FrgYOAS6pqttm3C1JOmgdkGECUFWfAj61\nTM11nyrbTziO/Yvj2L84jg6p+rHr1pIkTeRAvWYiSdqPGCYjlnpES5InJPlo235DkjXL38uljTGO\nf5vk9iS3JLkmyVi3/i23cR+Zk+Q1SSrJfnknzjjjSPLL7b/JbUkuX+4+jmOM36vjklyb5Ob2u3X6\nLPq5J0kuSXJ/klt3sz1JLmxjvCXJi5e7j+MYYxyvb/2/JckXkrxgn3eqqvwMp/oOAf4W+BngUODL\nwAm71PnXwAfa8gbgo7Pu95TjeDnwU2357AN1HK3ek4HPA9cDc7Pu95T/PdYCNwNHtvWnz7rfU47j\nIuDstnwC8PVZ93uRcfwC8GLg1t1sPx34NMN32U4Gbph1n6ccx8+N/D6dthzjcGbymHEe0bIeuLQt\nfww4JcliX6CcpSXHUVXXVtXDbfV6hu/p7G/GfWTOu4D/BPz9cnZuAuOM418C762qBwCq6v5l7uM4\nxhlHAYe35aewyHe/Zq2qPg/s2EOV9cBlNbgeOCLJMcvTu/EtNY6q+sLO3yeW6d+4YfKYxR7Rsmp3\ndarqEeAh4GnL0rvxjTOOUZsY/hLb3yw5jiQvAo6tqk8uZ8cmNM5/j2cBz0ryv5Jcn2TdsvVufOOM\n43eAX0mywHCn5a8vT9f2qkn//RwIluXf+AF7a/A+MM4jWsZ6jMuMjd3HJL8CzAG/uE97NJ09jiPJ\n44ALgDcsV4emNM5/jxUMp7pexvAX5F8leX5VPbiP+zaJccZxJvChqvrDJP8U+HAbxz/s++7tNQfC\nv/GxJXk5Q5j8/L5uy5nJY8Z5RMsP6yRZwTCV39OUeRbGetRMklcCvwW8uqq+v0x9m8RS43gy8Hzg\nc0m+znB+e8t+eBF+3N+rq6rq/1XVXcBXGcJlfzLOODYBVwJU1XXAExmeE3UgGevfz4EgyT8G/hRY\nX1Xf3tftGSaPGecRLVuAjW35NcBnq13h2o8sOY52eui/MATJ/nh+HpYYR1U9VFVHVdWaqlrDcF74\n1VU1P5vu7tY4v1f/jeGmCJIcxXDa62vL2suljTOOu4FTAJI8lyFMti9rL/ttAc5qd3WdDDxUVffN\nulOTSnIc8HHgV6vqb5ajTU9zNbWbR7QkeScwX1VbgIsZpu7bGGYkG2bX48WNOY7/DDwJ+PN2/8Dd\nVfXqmXV6EWOOY7835jiuBk5NcjvwKPDvluMvyUmMOY63Ax9M8hsMp4besL/9sZXkIwynE49q13bO\nAx4PUFUfYLjWczqwDXgYeONserpnY4zjPzBcz31f+zf+SO3jhz/6DXhJUjdPc0mSuhkmkqRuhokk\nqZthIknqZphI0k+gpR4GuUvdC5J8qX3+JsnEX5j1bi7pIJekgLVVtW3WfdHek+QXgO8xPGvs+RPs\n9+vAi6rqTZO058xEB50kP98ey/1Qkh3tmVj/ZAb9qCTPXOY2P5fk15azTc3GYg+DTPKzSf4yyU1J\n/irJcxbZ9UzgI5O255cWdVBJcjjwSYZH71/J8Dj1fwbsj4+Ukfa2i4A3V9WdSU4C3ge8YufGDO82\nOh747KQHdmaig82zAKrqI1X1aFX9XVV9pqpugeEBkkl+O8k32vnmy5I8pW1b02YTG5PcneRbSX5r\n54GTHJbk0iQPJLkjyW+2bydPLMmb2jEeSHJ1Rl5g1vrw5iR3tu3v3fkqhCSHJPnD1re7kry11V+R\n5PcZgvNPknwvyZ+MNPnKxY6nnxxJnsTwnpM/T/Ilhkcq7fp4/Q3Ax6rq0Ykb2NcvTPHjZ3/6MLxv\n49sM76U5jfYCoZHtb2J4lMbPMDxy5uPAh9u2NQyPCfkgcBjwAoYZzXPb9vOB/wEcyfCAwFuAhT30\npYBnLlJ+RuvDcxnOHvw28IVd9vskcARwHMPzr9a1bW8Gbm/tHwn891Z/Rdv+OeDXFunHosfzc2B/\n2u/srW35cOC+JerfDPzcNG05M9FBpaq+w/A47p2hsD3JliRHtyqvB/6ohpdAfQ84F9jQnhK90+/W\nMKP5MsMbB3e+EvWXgT+oqgeqagG4cMpu/ivgP1bVHTW8N+cPgBfmR1+vfH5VPVhVdwPXAi8c6cN7\nqmqhhpcjnT9mm7s7nn5CtN/9u5K8Fn74iuIfvs43ybMZ/gC5bprjGyY66LT/Sb+hqlYzPMb+p4E/\nbpt/GvjGSPVvMMwOjh4p+z8jyw8zzGB27jv6YqXR5Uk8A3hPkgfbLZo7GN6zMfqSpr3dh90dTweo\n9jDI64BnJ1lIsonhj6VNSb4M3MaPvi3zTOCKalOUSXkBXge1qvrrJB9imA3A8O6K0RnAccAjwDdZ\n+tWn97U6t7f1Y/dQd0/uAX6/qv5sin139mGnXfvgdwEOElV15m42Lfomz6r6nZ72nJnooJLkOUne\nnmR1Wz+W4S+y61uVjwC/keG9HU9iOMX00Xa6aSlXAucmOTLJKuCtY+xzaJInjnwOAT7QjvO81sen\n7Dw1MWYf3pZkVZIjgHfssv2bDNeDpL3KMNHB5rvAScANSf4vQ4jcyvAuDoBLgA8DnwfuAv6e8d9l\n/k6GN/XdxXDh+2MsfcvxbcDfjXzeWFWfAN4NXJHkO61/p43Zhw8Cn2G4+H8zw/s5HmF4TwrAe4DX\ntLu2pr2mI/0YvwEv7SNJzgY2VNUvzrAPpwEfqKpnLFlZ6uDMRNpLkhyT5KXtuyrPZpjtfGKZ+3BY\nktPb90pWMbyBb1n7oIOTMxNpL2m37v4FwzeIHwSuAM6tqh8sYx9+iuG7Ls9hOG32F8Db2m2h0j5j\nmEiSunmaS5LUzTCRJHUzTCRJ3QwTSVI3w0SS1M0wkSR1+/8nQ/TrwCEFHAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x2448956f0f0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_train_merged['song_length']=df_train_merged['song_length'].replace(np.NaN,100)\n",
    "fig = plt.figure()\n",
    "sns.distplot(df_train_merged['song_length'],kde=False)\n",
    "plt.xlabel(\"Song Length\", fontsize=12)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\scipy\\stats\\stats.py:1633: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
      "  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAENCAYAAADKcIhSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAG35JREFUeJzt3X+0XWV95/H3p4ko1mqCXBiaBEM1\n/kBWTfGWZMZ2RsFCYLoMnYWzok5JbWZiXdCxXU4LtLNKq9bBmWmprCouLCnBpcYsqkOWxcYM0joz\n1cBFEIhoc0c0XGEgNAGxdnDA7/xxnrSHy725P/ZNDjd5v9Y66+z93c/ez7P9cT/Z++xznlQVkiR1\n8SODHoAkaf4zTCRJnRkmkqTODBNJUmeGiSSpM8NEktSZYSJJ6swwkSR1ZphIkjpbOOgBHC7HH398\nLV++fNDDkKR55fbbb3+kqoamanfUhMny5csZGRkZ9DAkaV5J8u3ptPM2lySpM8NEktSZYSJJ6sww\nkSR1ZphIkjozTCRJnRkmkqTODBNJUmeGiSSps6PmG/CSDp1P7NwzYf2tq04+zCPRoHhlIknqbNph\nkmRBkjuSfLatn5JkZ5LdST6V5JhWf25bH23bl/cd47JW/0aSc/rqa1ptNMmlffUZ9yFJOvxmcmXy\nLuDevvUPAFdW1QpgP7Ch1TcA+6vqZcCVrR1JTgXWAa8G1gAfbgG1APgQcC5wKvCW1nbGfUiSBmNa\nYZJkKfAvgT9p6wHOBG5oTTYD57fltW2dtv2s1n4tsKWqnqiq+4BR4Iz2Gq2qb1bVD4AtwNpZ9iFJ\nGoDpXpn8EfCbwA/b+ouBR6vqybY+Bixpy0uA+wHa9sda+3+oj9tnsvps+pAkDcCUYZLk54GHq+r2\n/vIETWuKbXNVn6r/f5BkY5KRJCN79+6dYBdJ0lyYzpXJ64A3JfkWvVtQZ9K7UlmU5MCjxUuBB9ry\nGLAMoG1/EbCvvz5un8nqj8yij6epqmuqariqhoeGppwoTJI0S1OGSVVdVlVLq2o5vQ/Qv1BVbwNu\nAS5ozdYDN7blbW2dtv0LVVWtvq49iXUKsAK4FbgNWNGe3Dqm9bGt7TPTPiRJA9DlS4uXAFuSvA+4\nA7i21a8FPpZklN7VwjqAqtqVZCvwNeBJ4KKqegogycXAdmABsKmqds2mD0nSYORo+Qf98PBwOQe8\ndGj4DfgjV5Lbq2p4qnZ+A16S1JlhIknqzDCRJHVmmEiSOjNMJEmdGSaSpM4ME0lSZ4aJJKkzw0SS\n1JlhIknqzDCRJHVmmEiSOjNMJEmdGSaSpM4ME0lSZ4aJJKmzKcMkyfOS3Jrkq0l2Jfm9Vr8uyX1J\n7myvla2eJFclGU1yV5LT+461Psnu9lrfV39tkrvbPlclSasfl2RHa78jyeKp+pAkHX7TuTJ5Ajiz\nql4DrATWJFndtv1GVa1srztb7Vx687uvADYCV0MvGIDLgVXAGcDlB8KhtdnYt9+aVr8UuLmqVgA3\nt/VJ+5AkDcaUYVI932urz2mvg831uxa4vu33ZWBRkpOAc4AdVbWvqvYDO+gF00nAC6vqS9WbQ/h6\n4Py+Y21uy5vH1SfqQ5I0ANP6zCTJgiR3Ag/TC4SdbdPvt9tMVyZ5bqstAe7v232s1Q5WH5ugDnBi\nVT0I0N5PmKIPSdIATCtMquqpqloJLAXOSHIacBnwSuCngeOAS1rzTHSIWdQPZlr7JNmYZCTJyN69\ne6c4pCRptmb0NFdVPQr8JbCmqh5st5meAP6U3ucg0LtKWNa321LggSnqSyeoAzx04PZVe394ij7G\nj/eaqhququGhoaGZnKokaQam8zTXUJJFbflY4I3A1/v+yIfeZxn3tF22ARe2J65WA4+1W1TbgbOT\nLG4fvJ8NbG/bHk+yuh3rQuDGvmMdeOpr/bj6RH1IkgZg4TTanARsTrKAXvhsrarPJvlCkiF6t5zu\nBH6ltb8JOA8YBb4PvB2gqvYleS9wW2v3nqra15bfCVwHHAt8rr0ArgC2JtkA7AHefLA+JEmDkd4D\nVEe+4eHhGhkZGfQwpCPSJ3bumbD+1lUnH+aRaK4lub2qhqdq5zfgJUmdGSaSpM4ME0lSZ4aJJKkz\nw0SS1JlhIknqzDCRJHVmmEiSOjNMJEmdGSaSpM4ME0lSZ4aJJKkzw0SS1JlhIknqzDCRJHVmmEiS\nOjNMJEmdTWcO+OcluTXJV5PsSvJ7rX5Kkp1Jdif5VJJjWv25bX20bV/ed6zLWv0bSc7pq69ptdEk\nl/bVZ9yHJOnwm86VyRPAmVX1GmAlsCbJauADwJVVtQLYD2xo7TcA+6vqZcCVrR1JTgXWAa8G1gAf\nTrKgzS3/IeBc4FTgLa0tM+1DkjQYU4ZJ9XyvrT6nvQo4E7ih1TcD57fltW2dtv2sJGn1LVX1RFXd\nB4wCZ7TXaFV9s6p+AGwB1rZ9ZtqHJGkApvWZSbuCuBN4GNgB/G/g0ap6sjUZA5a05SXA/QBt+2PA\ni/vr4/aZrP7iWfQxftwbk4wkGdm7d+90TlWSNAvTCpOqeqqqVgJL6V1JvGqiZu19oiuEmsP6wfp4\neqHqmqoarqrhoaGhCXaRJM2FGT3NVVWPAn8JrAYWJVnYNi0FHmjLY8AygLb9RcC+/vq4fSarPzKL\nPiRJAzCdp7mGkixqy8cCbwTuBW4BLmjN1gM3tuVtbZ22/QtVVa2+rj2JdQqwArgVuA1Y0Z7cOobe\nh/Tb2j4z7UOSNAALp27CScDm9tTVjwBbq+qzSb4GbEnyPuAO4NrW/lrgY0lG6V0trAOoql1JtgJf\nA54ELqqqpwCSXAxsBxYAm6pqVzvWJTPpQ5I0GDla/kE/PDxcIyMjgx6GdET6xM49E9bfuurkwzwS\nzbUkt1fV8FTt/Aa8JKkzw0SS1JlhIknqzDCRJHVmmEiSOpvOo8GSBEz+1JbklYkkqTPDRJLUmWEi\nSerMMJEkdWaYSJI6M0wkSZ0ZJpKkzgwTSVJnhokkqTPDRJLU2XSm7V2W5JYk9ybZleRdrf67Sb6T\n5M72Oq9vn8uSjCb5RpJz+uprWm00yaV99VOS7EyyO8mn2vS9tCl+P9Xa70yyfKo+JEmH33SuTJ4E\n3l1VrwJWAxclObVtu7KqVrbXTQBt2zrg1cAa4MNJFrRpfz8EnAucCryl7zgfaMdaAewHNrT6BmB/\nVb0MuLK1m7SPWf+nIEnqZMowqaoHq+orbflx4F5gyUF2WQtsqaonquo+YBQ4o71Gq+qbVfUDYAuw\nNkmAM4Eb2v6bgfP7jrW5Ld8AnNXaT9aHJGkAZvSZSbvN9FPAzla6OMldSTYlWdxqS4D7+3Yba7XJ\n6i8GHq2qJ8fVn3astv2x1n6yY40f78YkI0lG9u7dO5NTlSTNwLTDJMkLgD8Dfq2qvgtcDbwUWAk8\nCPzBgaYT7F6zqM/mWE8vVF1TVcNVNTw0NDTBLpKkuTCtMEnyHHpB8vGq+jRAVT1UVU9V1Q+Bj/KP\nt5nGgGV9uy8FHjhI/RFgUZKF4+pPO1bb/iJg30GOJUkagOk8zRXgWuDeqvrDvvpJfc1+AbinLW8D\n1rUnsU4BVgC3ArcBK9qTW8fQ+wB9W1UVcAtwQdt/PXBj37HWt+ULgC+09pP1IUkagOnMtPg64BeB\nu5Pc2Wq/Re9prJX0bi99C3gHQFXtSrIV+Bq9J8EuqqqnAJJcDGwHFgCbqmpXO94lwJYk7wPuoBde\ntPePJRmld0Wybqo+JEmHX3r/0D/yDQ8P18jIyKCHIc1rM522962rTj5EI9HhkuT2qhqeqp3fgJck\ndWaYSJI6M0wkSZ0ZJpKkzgwTSVJnhokkqTPDRJLUmWEiSerMMJEkdWaYSJI6M0wkSZ0ZJpKkzgwT\nSVJnhokkqTPDRJLUmWEiSepsOtP2LktyS5J7k+xK8q5WPy7JjiS72/viVk+Sq5KMJrkryel9x1rf\n2u9Osr6v/tokd7d9rmpTBc+qD0nS4TedK5MngXdX1auA1cBFSU4FLgVurqoVwM1tHeBcenOyrwA2\nAldDLxiAy4FVwBnA5QfCobXZ2LffmlafUR+SpMGYMkyq6sGq+kpbfhy4F1gCrAU2t2abgfPb8lrg\n+ur5MrAoyUnAOcCOqtpXVfuBHcCatu2FVfWl6s0hfP24Y82kD0nSAMzoM5Mky4GfAnYCJ1bVg9AL\nHOCE1mwJcH/fbmOtdrD62AR1ZtHH+PFuTDKSZGTv3r0zOVVJ0gxMO0ySvAD4M+DXquq7B2s6Qa1m\nUT/ocKazT1VdU1XDVTU8NDQ0xSElSbM1rTBJ8hx6QfLxqvp0Kz904NZSe3+41ceAZX27LwUemKK+\ndIL6bPqQJA3AdJ7mCnAtcG9V/WHfpm3AgSey1gM39tUvbE9crQYea7eotgNnJ1ncPng/G9jetj2e\nZHXr68Jxx5pJH5KkAVg4jTavA34RuDvJna32W8AVwNYkG4A9wJvbtpuA84BR4PvA2wGqal+S9wK3\ntXbvqap9bfmdwHXAscDn2ouZ9iFJGowpw6Sq/icTf0YBcNYE7Qu4aJJjbQI2TVAfAU6boP63M+1D\nknT4+Q14SVJnhokkqTPDRJLUmWEiSerMMJEkdWaYSJI6M0wkSZ0ZJpKkzgwTSVJnhokkqTPDRJLU\nmWEiSerMMJEkdWaYSJI6M0wkSZ0ZJpKkzqYzbe+mJA8nuaev9rtJvpPkzvY6r2/bZUlGk3wjyTl9\n9TWtNprk0r76KUl2Jtmd5FNJjmn157b10bZ9+VR9SJIGYzpXJtcBayaoX1lVK9vrJoAkpwLrgFe3\nfT6cZEGSBcCHgHOBU4G3tLYAH2jHWgHsBza0+gZgf1W9DLiytZu0j5mdtiRpLk0ZJlX1RWDfVO2a\ntcCWqnqiqu6jN0f7Ge01WlXfrKofAFuAtUkCnAnc0PbfDJzfd6zNbfkG4KzWfrI+JEkD0uUzk4uT\n3NVugy1utSXA/X1txlptsvqLgUer6slx9acdq21/rLWf7FjPkGRjkpEkI3v37p3dWUqSpjTbMLka\neCmwEngQ+INWzwRtaxb12RzrmcWqa6pquKqGh4aGJmoiSZoDswqTqnqoqp6qqh8CH+UfbzONAcv6\nmi4FHjhI/RFgUZKF4+pPO1bb/iJ6t9smO5YkaUBmFSZJTupb/QXgwJNe24B17UmsU4AVwK3AbcCK\n9uTWMfQ+QN9WVQXcAlzQ9l8P3Nh3rPVt+QLgC639ZH1IkgZk4VQNknwSeD1wfJIx4HLg9UlW0ru9\n9C3gHQBVtSvJVuBrwJPARVX1VDvOxcB2YAGwqap2tS4uAbYkeR9wB3Btq18LfCzJKL0rknVT9SFJ\nGoz0/rF/5BseHq6RkZFBD0Oa1z6xc8+M2r911cmHaCQ6XJLcXlXDU7XzG/CSpM4ME0lSZ4aJJKkz\nw0SS1JlhIknqzDCRJHVmmEiSOjNMJEmdGSaSpM4ME0lSZ4aJJKkzw0SS1NmUvxosSbM12Q9D+gOQ\nRx6vTCRJnRkmkqTODBNJUmdThkmSTUkeTnJPX+24JDuS7G7vi1s9Sa5KMprkriSn9+2zvrXfnWR9\nX/21Se5u+1yVJLPtQ5I0GNO5MrkOWDOudilwc1WtAG5u6wDn0puTfQWwEbgaesFAb7rfVcAZwOUH\nwqG12di335rZ9CFJGpwpw6SqvkhvDvZ+a4HNbXkzcH5f/frq+TKwKMlJwDnAjqraV1X7gR3Amrbt\nhVX1perNH3z9uGPNpA9J0oDM9jOTE6vqQYD2fkKrLwHu72s31moHq49NUJ9NH5KkAZnrD+AzQa1m\nUZ9NH89smGxMMpJkZO/evVMcVpI0W7MNk4cO3Fpq7w+3+hiwrK/dUuCBKepLJ6jPpo9nqKprqmq4\nqoaHhoZmdIKSpOmbbZhsAw48kbUeuLGvfmF74mo18Fi7RbUdODvJ4vbB+9nA9rbt8SSr21NcF447\n1kz6kCQNyJQ/p5Lkk8DrgeOTjNF7KusKYGuSDcAe4M2t+U3AecAo8H3g7QBVtS/Je4HbWrv3VNWB\nD/XfSe+JsWOBz7UXM+1DkjQ4U4ZJVb1lkk1nTdC2gIsmOc4mYNME9RHgtAnqfzvTPiRJg+E34CVJ\nnRkmkqTODBNJUmeGiSSpM8NEktSZYSJJ6swwkSR1ZphIkjozTCRJnRkmkqTODBNJUmeGiSSpM8NE\nktSZYSJJ6swwkSR1ZphIkjrrFCZJvpXk7iR3JhlpteOS7Eiyu70vbvUkuSrJaJK7kpzed5z1rf3u\nJOv76q9txx9t++ZgfUiSBmPKmRan4Q1V9Ujf+qXAzVV1RZJL2/olwLnAivZaBVwNrEpyHL2pgIeB\nAm5Psq2q9rc2G4Ev05uudw29aX0n60PSHPjEzj2DHoLmmUNxm2stsLktbwbO76tfXz1fBhYlOQk4\nB9hRVftagOwA1rRtL6yqL7Wpeq8fd6yJ+pAkDUDXMCng80luT7Kx1U6sqgcB2vsJrb4EuL9v37FW\nO1h9bIL6wfqQJA1A19tcr6uqB5KcAOxI8vWDtM0EtZpFfdpawG0EOPnkk2eyqyRpBjpdmVTVA+39\nYeAzwBnAQ+0WFe394dZ8DFjWt/tS4IEp6ksnqHOQPsaP75qqGq6q4aGhodmepiRpCrMOkyQ/muTH\nDiwDZwP3ANuAA09krQdubMvbgAvbU12rgcfaLartwNlJFrenss4GtrdtjydZ3Z7iunDcsSbqQ5I0\nAF1uc50IfKY9rbsQ+ERV/UWS24CtSTYAe4A3t/Y3AecBo8D3gbcDVNW+JO8Fbmvt3lNV+9ryO4Hr\ngGPpPcX1uVa/YpI+JEkDMOswqapvAq+ZoP63wFkT1Au4aJJjbQI2TVAfAU6bbh+SpMHwG/CSpM4M\nE0lSZ4aJJKkzw0SS1JlhIknqzDCRJHVmmEiSOjNMJEmdGSaSpM4ME0lSZ3Mx06IkzcjBZnJ86yqn\ni5iPDBPpKOb0vJor3uaSJHVmmEiSOjNMJEmdGSaSpM7m9QfwSdYAHwQWAH9SVVcMeEjSs5IftOtQ\nm7dhkmQB8CHg54Ax4LYk26rqa4MdmaQuJgs+Hxl+dpu3YQKcAYy26YNJsgVYCxgmOuJ5paFnm/kc\nJkuA+/vWx4BVAxqLNC2GwOzN1X92XuEcGvM5TDJBrZ7WINkIbGyr30vyjUM+qsPjeOCRQQ9iDh1p\n5wNH3jkdMefztt7bEXM+zaE8n5dMp9F8DpMxYFnf+lLggf4GVXUNcM3hHNThkGSkqoYHPY65cqSd\nDxx55+T5PLs9G85nPj8afBuwIskpSY4B1gHbBjwmSToqzdsrk6p6MsnFwHZ6jwZvqqpdAx6WJB2V\n5m2YAFTVTcBNgx7HABxpt+6OtPOBI++cPJ9nt4GfT6pq6laSJB3EfP7MRJL0LGGYzDNJFiW5IcnX\nk9yb5J8OekxdJPn1JLuS3JPkk0meN+gxzUSSTUkeTnJPX+24JDuS7G7viwc5xpmY5Hz+S/vf211J\nPpNk0SDHOBMTnU/ftv+QpJIcP4ixzcZk55PkV5N8o/1/6T8PYmyGyfzzQeAvquqVwGuAewc8nllL\nsgT498BwVZ1G70GKdYMd1YxdB6wZV7sUuLmqVgA3t/X54jqeeT47gNOq6ieBvwEuO9yD6uA6nnk+\nJFlG76eY5tu3SK9j3PkkeQO9X//4yap6NfBfBzAuw2Q+SfJC4J8D1wJU1Q+q6tHBjqqzhcCxSRYC\nz2fcd4We7arqi8C+ceW1wOa2vBk4/7AOqoOJzqeqPl9VT7bVL9P7Tte8MMl/PwBXAr/JuC86P9tN\ncj7vBK6oqidam4cP+8AwTOabnwD2An+a5I4kf5LkRwc9qNmqqu/Q+1fUHuBB4LGq+vxgRzUnTqyq\nBwHa+wkDHs9c+mXgc4MeRBdJ3gR8p6q+OuixzJGXAz+bZGeSv0ry04MYhGEyvywETgeurqqfAv6O\n+XUL5WnaZwlrgVOAHwd+NMm/GeyoNJkkvw08CXx80GOZrSTPB34b+J1Bj2UOLQQWA6uB3wC2Jpno\n56YOKcNkfhkDxqpqZ1u/gV64zFdvBO6rqr1V9f+ATwP/bMBjmgsPJTkJoL0P5LbDXEqyHvh54G01\nv79P8FJ6/3j5apJv0btl95Uk/2Sgo+pmDPh09dwK/JDeb3UdVobJPFJV/we4P8krWuks5vdP7u8B\nVid5fvuX1FnM4wcK+mwD1rfl9cCNAxxLZ20SukuAN1XV9wc9ni6q6u6qOqGqllfVcnp/iE9v/9+a\nr/4bcCZAkpcDxzCAH7E0TOafXwU+nuQuYCXw/gGPZ9baFdYNwFeAu+n973Hg3+SdiSSfBL4EvCLJ\nWJINwBXAzyXZTe+JoXkzA+gk5/PHwI8BO5LcmeQjAx3kDExyPvPWJOezCfiJ9rjwFmD9IK4e/Qa8\nJKkzr0wkSZ0ZJpKkzgwTSVJnhokkqTPDRJLUmWEiHeXaL+e+bNDj0PxmmOiok+Rnkvx1kseS7Evy\nvwbxe0aD+COe5C+T/NvD2aeODvN62l5pptovL3+W3i+tbqX3beGfBZ4Y5Lik+c4rEx1tXg5QVZ+s\nqqeq6u/bT6zfBZDkR5L8xyTfbpMQXZ/kRW3b8nY1sT7JniSPtB8/pG0/NsnmJPvbxGW/mWRsNoNM\n8svtGPuTbE/ykr5tleRX2uRb+5N86MAP+yVZkOQP2tjuS3Jxa78wye/TC84/TvK9JH/c1+UbJzqe\nNF2GiY42fwM81f7on5tnzoL4S+31Bno/+f8Cej8n0u9ngFfQ+y2x30nyqla/HFje9vs5YFa/gJzk\nfOC3gH8FDAH/A/jkuGY/D/w0vQnS/jVwTqv/O+Bcej+1czp9c6lU1W+3Y11cVS+oqouncTxpWgwT\nHVWq6rv0wqCAjwJ7k2xLcmJr8jbgD6vqm1X1PXqzCq5rk3cd8HvtiuarwFfp/QGG3h/h91fV/qoa\nA66a5TDfAfynqrq3TUr1fmBl/9UJvcmQHq2qPcAt9MLjwBg+WFVjVbWf6f8u2GTHk6bFMNFRp/2R\n/qWqWgqcRm8ulT9qm38c+HZf82/T+2zxxL5a/y/Mfp/e1cuBfe/v29a/PBMvAT6Y5NEkj9KbWS/A\nkkM4hsmOJ02LYaKjWlV9nd682qe10gP0/pgfcDK9CaEemsbhHuTpU9oum+Ww7gfeUVWL+l7HVtVf\nz8EY/GVXHRKGiY4qSV6Z5N1Jlrb1ZcBb6M1tDr3PJn49ySlJXkDvFtOn+uZAP5itwGVJFidZAlw8\n1Q7AMUme1/daAHykHefVbYwvSvLmaZ7iVuBdSZYkWURvHpJ+D9H7TEeaU4aJjjaPA6uAnUn+jl6I\n3AO8u23fBHwM+CJwH/B/6c0hMx3voTfZ0n3Af6c3V8tUjxzvAv6+7/X2qvoM8AFgS5LvtvGdO80x\nfBT4PHAXcAdwE70rq6fa9g8CF7Sntmb7mY70DM5nIh0iSd4JrKuqfzHAMZwLfKSqXjJlY6kDr0yk\nOZLkpCSva99VeQW9q53PHOYxHJvkvPa9kiX0Hlc+rGPQ0ckrE2mOtEd3/xw4BXiU3hSql1XVDw7j\nGJ4P/BXwSnq3zf4ceFd7JFo6ZAwTSVJn3uaSJHVmmEiSOjNMJEmdGSaSpM4ME0lSZ4aJJKmz/w8L\nw9EAKcAZpgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x244742e89b0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "X_train_log = np.log1p(df_train_merged['song_length'])#取值区间太大，尝试Log变换\n",
    "fig = plt.figure()\n",
    "sns.distplot(X_train_log,kde=False)\n",
    "plt.xlabel(\"Song Length\", fontsize=12)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#把较大和较小的值去除\n",
    "ulimit=300000 \n",
    "df_train_merged = df_train_merged[df_train_merged['song_length'] < ulimit]\n",
    "ulimit1=150000\n",
    "df_train_merged = df_train_merged[df_train_merged['song_length'] > ulimit1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "source_system_tab,source_screen_name,source_type为类别型特征，可用value_counts或countplot绘制"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    7.812943e+06\n",
       "mean     2.351859e+05\n",
       "std      3.428453e+04\n",
       "min      1.500010e+05\n",
       "25%      2.104950e+05\n",
       "50%      2.354150e+05\n",
       "75%      2.621640e+05\n",
       "max      2.999990e+05\n",
       "Name: song_length, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_merged['song_length'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAELCAYAAABwLzlKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xv8XdOd//HXW1zHLUH4hUjjkk5H\nqbRCU1r3IXQqaiimJVWdlHGdqf7Qdlzrh9HWjF60QSpabapUhTKRhmDcg8jFpfkWJZUfIXEvfvj8\n/ljryM7JOee7v998v98dOe/n43EeZ+911l5rnXP2OZ+z915nLUUEZmZmVVip6gaYmVn7chAyM7PK\nOAiZmVllHITMzKwyDkJmZlYZByEzM6uMg5CZmVXGQcjMzCrjIGRmZpVZueoGLO822GCDGDp0aNXN\nMDP7QHnggQdeiIiBneVzEOrE0KFDmT59etXNMDP7QJH05zL5fDrOzMwq4yBkZmaVcRAyM7PKOAiZ\nmVllHITMzKwyDkJmZlYZByEzM6uMg5CZmVXGQcjMzCrjEROsV+z0g50qqffO4+6spF4z6x4fCZmZ\nWWUchMzMrDIOQmZmVhkHITMzq4yDkJmZVabXgpCk1SXdJ+lhSXMknZnTL5f0pKQZ+TY8p0vSRZI6\nJM2U9IlCWWMkzc23MYX07STNyttcJEk5fT1JU3L+KZIGdFaHmZn1vd48EnoL2D0itgWGA6MkjcyP\nfSMihufbjJy2DzAs38YCF0MKKMDpwCeBHYDTa0El5xlb2G5UTj8FmBoRw4Cpeb1pHWZmVo1eC0KR\nvJZXV8m3aLHJaOCKvN09QH9Jg4C9gSkRsTAiFgFTSAFtELBORNwdEQFcAexfKGtCXp5Ql96oDjMz\nq0CvXhOS1E/SDOB5UiC5Nz90Tj4ddqGk1XLaJsAzhc3n5bRW6fMapANsFBHzAfL9hp3UYWZmFejV\nIBQR70bEcGAwsIOkrYFTgY8A2wPrASfn7GpURDfSWym1jaSxkqZLmr5gwYJOijQzs+7qk95xEfES\nMA0YFRHz8+mwt4Cfka7zQDoq2bSw2WDg2U7SBzdIB3iudpot3z/fSR317R0XESMiYsTAgQO7+GzN\nzKys3uwdN1BS/7y8BrAn8FghOIh0rWZ23mQScHjuwTYSeDmfSpsM7CVpQO6QsBcwOT/2qqSRuazD\ngesKZdV60Y2pS29Uh5mZVaA3BzAdBEyQ1I8U7K6KiBsk3SJpIOnU2AzgqJz/RmBfoAN4AzgCICIW\nSjobuD/nOysiFublo4HLgTWAm/IN4DzgKklHAk8DB7Wqw8zMqtFrQSgiZgIfb5C+e5P8ARzT5LHx\nwPgG6dOBrRukvwjs0ZU6zMys73nEBDMzq4yDkJmZVcZByMzMKuMgZGZmlXEQMjOzyjgImZlZZRyE\nzMysMg5CZmZWGQchMzOrjIOQmZlVxkHIzMwq4yBkZmaVcRAyM7PKOAiZmVllHITMzKwyDkJmZlYZ\nByEzM6uMg5CZmVXGQcjMzCrjIGRmZpXptSAkaXVJ90l6WNIcSWfm9M0k3StprqRfS1o1p6+W1zvy\n40MLZZ2a0x+XtHchfVRO65B0SiG9y3WYmVnf680jobeA3SNiW2A4MErSSOB84MKIGAYsAo7M+Y8E\nFkXElsCFOR+StgIOAT4KjAJ+LKmfpH7Aj4B9gK2AQ3NeulqHmZlVo9eCUCSv5dVV8i2A3YGrc/oE\nYP+8PDqvkx/fQ5Jy+sSIeCsingQ6gB3yrSMinoiIt4GJwOi8TVfrMDOzCvTqNaF8xDIDeB6YAvwJ\neCki3slZ5gGb5OVNgGcA8uMvA+sX0+u2aZa+fjfqqG/3WEnTJU1fsGBB9568mZl1qleDUES8GxHD\ngcGkI5e/a5Qt3zc6IokeTG9Vx5IJEeMiYkREjBg4cGCDTczMrCf0Se+4iHgJmAaMBPpLWjk/NBh4\nNi/PAzYFyI+vCywsptdt0yz9hW7UYWZmFeg0CElaU9JKefnDkvaTtEqJ7QZK6p+X1wD2BB4FbgUO\nzNnGANfl5Ul5nfz4LREROf2Q3LNtM2AYcB9wPzAs94RbldR5YVLepqt1mJlZBVbuPAu3A5+RNACY\nCkwHDga+2Ml2g4AJuRfbSsBVEXGDpEeAiZK+AzwEXJbzXwb8XFIH6ejkEICImCPpKuAR4B3gmIh4\nF0DSscBkoB8wPiLm5LJO7kodZmZWjTJBSBHxhqQjgR9ExH9IeqizjSJiJvDxBulPkK4P1ae/CRzU\npKxzgHMapN8I3NgTdZiZWd8rc01Ikj5FOvL5fU4rE7zMzMxaKhOETgROBa7Np8Y2J11zMTMzWyad\nHtFExG3AbZLWzOtPAMf3dsPMzGzFV6Z33KdyZ4JH8/q2kn7c6y0zM7MVXpnTcf8J7A28CBARDwM7\n92ajzMysPZT6s2pEPFOX9G4vtMXMzNpMmV5uz0jaEYj8p9DjyafmzMzMlkWZI6GjgGNIg3/OI03L\ncExvNsrMzNpDmd5xL9D56AhmZmZdVqZ33ITaGHB5fYCk8b3bLDMzawdlTsd9LI+CDUBELKLBcDxm\nZmZdVSYIrZQHLwVA0np42B4zM+sBZYLJ94C7JNWmyz6IBoOJmpmZdVWZjglXSHoA2I00M+kBEfFI\nr7fMzMxWeGVPqz0GLKrllzQkIp7utVaZmVlb6DQISToOOB14jjRSgoAAPta7TTMzsxVdmSOhE4C/\njYgXe7sxZmbWXsr0jnsGeLm3G2JmZu2nzJHQE8A0Sb8H3qolRsT3e61VZmbWFsocCT0NTAFWBdYu\n3FqStKmkWyU9KmmOpBNy+hmS/iJpRr7tW9jmVEkdkh6XtHchfVRO65B0SiF9M0n3Spor6dd5gFUk\nrZbXO/LjQzurw8zM+l6ZLtpnAkhaMyJe70LZ7wBfj4gHJa0NPCBpSn7swoj4bjGzpK2AQ4CPAhsD\nf5D04fzwj4C/Jw2ger+kSbmb+Pm5rImSfgIcCVyc7xdFxJaSDsn5Dm5WR0R4agozswr02syqETE/\nIh7My6/m7TdpscloYGJEvBURTwIdwA751hERT0TE28BEYLQkAbsDtT/RTgD2L5Q1IS9fDeyR8zer\nw8zMKtAnM6vm02EfB+7NScdKmilpfGFIoE1InSBq5uW0ZunrAy9FxDt16UuUlR9/OedvVlZ9e8dK\nmi5p+oIFC7ryVM3MrAt6fWZVSWsB1wAnRsQrpNNlW5DmJZpPGhYI0v+Plqq6G+ndKWvJhIhxETEi\nIkYMHDiwwSZmZtYTSnXRLs6sKukkSs6sKmkVUgC6MiJ+CxARz0XEuxHxHnAJi0+HzQM2LWw+GHi2\nRfoLQH9JK9elL1FWfnxdYGGLsszMrAK9NrNqvgZzGfBosTu3pEGFbJ8HZuflScAhuWfbZsAw4D7g\nfmBY7gm3KqljwaSICOBW4MC8/RjgukJZY/LygcAtOX+zOszMrAIte8dJ6gccFhHdmVl1J+AwYJak\nGTntm8ChkoaTToM9BXwNICLmSLoKeITUs+6YWq81SccCk4F+wPiImJPLOxmYKOk7wEOkoEe+/7mk\nDtIR0CGd1WFmZn1P6QChRQZpWkTs2jfNWf6MGDEipk+fXnUzPnB2+sFOldR753F3VlKvmS1J0gMR\nMaKzfGVGTLhT0g+BXwPv/0+o1v3azMysu8oEoR3z/VmFtCD9R8fMzKzbOrsmtBJwcURc1UftMTOz\nNtKyd1zuRn1sH7XFzMzaTJku2lMknZQHJF2vduv1lpmZ2QqvzDWhr+T74n+DAti855tjZmbtpMwo\n2pv1RUPMzKz9dBqEJB3eKD0iruj55piZWTspczpu+8Ly6sAewIOAg5CZmS2TMqfjjiuuS1oX+Hmv\ntcjMzNpGqakc6rxBGvjTzMxsmZS5JnQ9i+fcWQnYCvCfV83MbJmVuSb03cLyO8CfI2JeL7XHuuHp\ns7appN4hp82qpF4zW3GUCUJPA/Mj4k0ASWtIGhoRT/Vqy8zMbIVX5prQb4D3Cuvv5jQzM7NlUiYI\nrRwRb9dW8vKqvdckMzNrF2WC0AJJ+9VWJI0GXui9JpmZWbsoc03oKODKPLEdwDyg4SgKZmZmXVHm\nz6p/AkZKWos0Hfirvd8sMzNrB52ejpP0fyT1j4jXIuJVSQMkfafEdptKulXSo5LmSDohp68naYqk\nufl+QE6XpIskdUiaKekThbLG5PxzJY0ppG8naVbe5iJJ6m4dZmbW98pcE9onIl6qrUTEImDfEtu9\nA3w9Iv4OGAkcI2kr4BRgakQMA6bmdYB9SCMxDAPGAhdDCijA6cAngR2A02tBJecZW9huVE7vUh1m\nZlaNMkGon6TVaiuS1gBWa5EfgIiYHxEP5uVXgUeBTYDRwIScbQKwf14eDVwRyT1Af0mDgL2BKRGx\nMAfAKcCo/Ng6EXF3RARpQNViWV2pw8zMKlCmY8IvgKmSfkYavucrLP6CL0XSUODjwL3ARhExH1Kg\nkrRhzrYJ8Exhs3k5rVX6vAbpdKOO+XXtHUs6UmLIkCFdeapmZtYFZTom/IekmcCeOensiJhctoLc\noeEa4MSIeCVftmmYtVH13Uhv2Zwy20TEOGAcwIgRIzor08zMuqnsKNoPAbcB0/JyKZJWIQWgKyPi\ntzn5udopsHz/fE6fB2xa2Hww8Gwn6YMbpHenDjMzq0CZ3nFfAO4DDgS+ANwr6cAS2wm4DHg0Ir5f\neGgSUOvhNga4rpB+eO7BNhJ4OZ9SmwzslXvlDQD2Aibnx16VNDLXdXhdWV2pw8zMKlDmmtC3gO0j\n4nkASQOBPwBXd7LdTsBhwCxJM3LaN4HzgKskHUkaHPWg/NiNpF53HaQ5i44AiIiFks4G7s/5zoqI\nhXn5aOByYA3gpnyjq3WYmVk1ygShlWoBKHuREkdQEfE/NL4GA2mK8Pr8ARzTpKzxwPgG6dOBrRuk\nv9jVOszMrO+VCUL/LWky8Ku8fjDpiMLMzGyZlOkd9w1JBwCfJh3ZjIuIa3u9ZWZmtsIrcyRE7tn2\n204zmpmZdUHZLtpmZmY9zkHIzMwq0zQISZqa78/vu+aYmVk7aXVNaJCkXYD9JE2krrt1bXBSMzOz\n7moVhE4jTYEwGPh+3WMB7N5bjTIzs/bQNAhFxNXA1ZL+PSLO7sM2mZlZmyjzP6GzJe0H7JyTpkXE\nDb3bLDMzawdlBjA9FzgBeCTfTshpZmZmy6TMn1U/CwyPiPcAJE0gTedwam82zMzMVnxl/yfUv7C8\nbm80xMzM2k+ZI6FzgYck3Urqpr0zPgoyM7MeUKZjwq8kTQO2JwWhkyPi//Z2w8zMbMVXdgDT+aRZ\nSc3MzHqMx44zM7PKOAiZmVllWgYhSStJmt1XjTEzs/bSMgjl/wY9LGlIVwuWNF7S88UgJukMSX+R\nNCPf9i08dqqkDkmPS9q7kD4qp3VIOqWQvpmkeyXNlfRrSavm9NXyekd+fGhndZiZWTXKnI4bBMyR\nNFXSpNqtxHaXA6MapF8YEcPz7UYASVsBhwAfzdv8WFI/Sf2AHwH7AFsBh+a8AOfnsoYBi4Ajc/qR\nwKKI2BK4MOdrWkeJ52FmZr2kTO+4M7tTcETcXjwK6cRoYGJEvAU8KakD2CE/1hERTwDkKSVGS3qU\nNIr3P+U8E4AzgItzWWfk9KuBH0pSizru7s7zMzOzZdfpkVBE3AY8BaySl+8HlmUuoWMlzcyn6wbk\ntE2AZwp55uW0ZunrAy9FxDt16UuUlR9/OedvVpaZmVWkzACm/0w6ovhpTtoE+F0367sY2AIYDswH\nvlerpkHe6EZ6d8paiqSxkqZLmr5gwYJGWczMrAeUuSZ0DLAT8ApARMwFNuxOZRHxXES8mzs8XMLi\nU27zgE0LWQcDz7ZIfwHoL2nluvQlysqPrwssbFFWo3aOi4gRETFi4MCB3XmqZmZWQpkg9FZEvF1b\nyV/sDY8gOiNpUGH180Ct59wk4JDcs20zYBhwH+nU37DcE25VUseCSRERwK3AgXn7McB1hbLG5OUD\ngVty/mZ1mJlZRcp0TLhN0jeBNST9PfAvwPWdbSTpV8CuwAaS5gGnA7tKGk4KYk8BXwOIiDmSriLN\nV/QOcExEvJvLORaYDPQDxkfEnFzFycBESd8hTS1xWU6/DPh57niwkBS4WtZhZmbVKBOETiF1e55F\nCho3Apd2tlFEHNog+bIGabX85wDnNEi/MddZn/4Ei0/nFdPfBA7qSh1mZlaNMqNov5cnsruXdATz\neD69ZWZmtkw6DUKSPgv8BPgTqYfZZpK+FhE39XbjzMxsxVbmdNz3gN0iogNA0hbA7wEHITMzWyZl\nesc9XwtA2RPA873UHjMzayNNj4QkHZAX50i6EbiKdE3oIFLXaTMzs2XS6nTc5wrLzwG75OUFwICl\ns5uZmXVN0yAUEUf0ZUPMzKz9lOkdtxlwHDC0mD8i9uu9ZpmZWTso0zvud6Q/mV4PvNe7zTEzs3ZS\nJgi9GREX9XpLzMys7ZQJQv8l6XTgZuCtWmJELMucQmZmZqWC0DbAYaSZTGun4yKvm5mZdVuZIPR5\nYPPidA5mZmY9ocyICQ8D/Xu7IWZm1n7KHAltBDwm6X6WvCbkLtpmZrZMygSh03u9FWZm1pbKzCd0\nW180xMzM2k+ZERNeJfWGA1gVWAV4PSLW6c2GmZnZiq/MkdDaxXVJ+9NgWm0zM7OuKtM7bgkR8TtK\n/EdI0nhJz0uaXUhbT9IUSXPz/YCcLkkXSeqQNFPSJwrbjMn550oaU0jfTtKsvM1FktTdOszMrBqd\nBiFJBxRuB0o6j8Wn51q5HBhVl3YKMDUihgFT8zrAPsCwfBsLXJzrXo/UMeKTpKOv02tBJecZW9hu\nVHfqMDOz6pQ5Evpc4bY38CowurONIuJ2YGFd8mhgQl6eAOxfSL8iknuA/pIG5fqmRMTCiFgETAFG\n5cfWiYi7IyKAK+rK6kodZmZWkTLXhHpyXqGNImJ+Lne+pA1z+ibAM4V883Jaq/R5DdK7U8f8ZX1S\nZmbWPa2m9z6txXYREWf3YDvUqI5upHenjqUzSmNJp+wYMmRIJ8WamVl3tTod93qDG8CRwMndrO+5\n2imwfP98Tp8HbFrINxh4tpP0wQ3Su1PHUiJiXESMiIgRAwcO7NITNDOz8poGoYj4Xu0GjAPWAI4A\nJgKbd7O+SUCth9sY4LpC+uG5B9tI4OV8Sm0ysJekAblDwl7A5PzYq5JG5l5xh9eV1ZU6zMysIi2v\nCeXeaf8GfJF0kf8TuYNApyT9CtgV2EDSPFIvt/OAqyQdCTwNHJSz3wjsC3QAb5CCHRGxUNLZwP05\n31kRUevscDSpB94awE35RlfrMDOz6rS6JnQBcADpKGibiHitKwVHxKFNHtqjQd4AjmlSznhgfIP0\n6cDWDdJf7GodZmZWjVbXhL4ObAx8G3hW0iv59qqkV/qmeWZmtiJreiQUEV0eTcHMzKwrHGjMzKwy\nDkJmZlYZByEzM6uMg5CZmVXGQcjMzCrjIGRmZpVxEDIzs8o4CJmZWWUchMzMrDIOQmZmVhkHITMz\nq4yDkJmZVcZByMzMKuMgZGZmlWk5s6rZiua2nXfp8zp3uf22Pq/T7IPCR0JmZlYZByEzM6tMJUFI\n0lOSZkmaIWl6TltP0hRJc/P9gJwuSRdJ6pA0U9InCuWMyfnnShpTSN8ul9+Rt1WrOszMrBpVHgnt\nFhHDI2JEXj8FmBoRw4CpeR1gH2BYvo0FLoYUUIDTgU8COwCnF4LKxTlvbbtRndRhZmYVWJ5Ox40G\nJuTlCcD+hfQrIrkH6C9pELA3MCUiFkbEImAKMCo/tk5E3B0RAVxRV1ajOszMrAJVBaEAbpb0gKSx\nOW2jiJgPkO83zOmbAM8Utp2X01qlz2uQ3qoOMzOrQFVdtHeKiGclbQhMkfRYi7xqkBbdSC8tB8ax\nAEOGDOnKpmZm1gWVBKGIeDbfPy/pWtI1neckDYqI+fmU2vM5+zxg08Lmg4Fnc/qudenTcvrgBvlp\nUUd9+8YB4wBGjBjRpQBm1lU//Pr1fV7nsd/7XJ/XadZIn5+Ok7SmpLVry8BewGxgElDr4TYGuC4v\nTwIOz73kRgIv51Npk4G9JA3IHRL2Aibnx16VNDL3iju8rqxGdZiZWQWqOBLaCLg295peGfhlRPy3\npPuBqyQdCTwNHJTz3wjsC3QAbwBHAETEQklnA/fnfGdFxMK8fDRwObAGcFO+AZzXpA4zM6tAnweh\niHgC2LZB+ovAHg3SAzimSVnjgfEN0qcDW5etw8zMqrE8ddE2M7M24yBkZmaVcRAyM7PKOAiZmVll\nHITMzKwyDkJmZlYZByEzM6uMg5CZmVXGQcjMzCrjIGRmZpVxEDIzs8pUNZ+QmS3HzvnSgZXU+61f\nXF1JvVYdHwmZmVllfCTURdt944pK6n3ggsMrqdfMrDf5SMjMzCrjIGRmZpVxEDIzs8o4CJmZWWUc\nhMzMrDJtGYQkjZL0uKQOSadU3R4zs3bVdkFIUj/gR8A+wFbAoZK2qrZVZmbtqe2CELAD0BERT0TE\n28BEYHTFbTIza0vtGIQ2AZ4prM/LaWZm1scUEVW3oU9JOgjYOyK+mtcPA3aIiOMKecYCY/Pq3wKP\n91D1GwAv9FBZPcVtKmd5bBMsn+1ym8pZ0dv0oYgY2Fmmdhy2Zx6waWF9MPBsMUNEjAPG9XTFkqZH\nxIieLndZuE3lLI9tguWzXW5TOW5T0o6n4+4HhknaTNKqwCHApIrbZGbWltruSCgi3pF0LDAZ6AeM\nj4g5FTfLzKwttV0QAoiIG4EbK6i6x0/x9QC3qZzlsU2wfLbLbSrHbaINOyaYmdnyox2vCZmZ2XLC\nQagPSZomqcd6nkg6Q9JJks6StGdPldtXJH1Z0g/z8lGSDs/LPfo69TVJr/VxfbtKuiEv79dqKKpa\n2yRtLKnpXNqS+kv6l55vbdP63m+3pP2Lo5j0xv5Q95q9vx92o5wvS9q4sH5pre2SDpL0qKRbJY2Q\ndFE36zhR0t8U1m+U1L+H2ttjZXeXg9ByLA8x1KmIOC0i/tDb7emMkm7tUxHxk4goPW2tpFLXM8u+\nhl1Vtv5lrKPLr2dETIqI80rkezYiDmyRpT/QZ0Gort37k4bU+iD4MvD+l3pEfDUiHsmrRwL/EhG7\nRcT0iDi+m3WcCLwfKCJi34h4qSfa28Nld4uDUEmSviTpPkkzJP1U0ockzZW0gaSVJN0haS9JQyU9\nJmmCpJmSri7+0iiUd6ikWZJmSzq/kP5aPrK5F/iUpO0k3SbpAUmTJZ2bB1/9A+mPtEi6XNKBefk8\nSY/kur+b0zaSdK2kh/Ntx5z+b7n+2ZJOzGnnF38B56Otr+flb+Tt35T0YN7ud5L+LGk+8FfgH/Lr\nMVfSwLzdSkqDxW7Q4vU9Q9JJhaQvSbor17FDIc84STcDV+TX+o7clgcLz2vX/Ovzl8AsSWfntv8+\nt/95ST9r8NoOytv/s6T7c95rau9ffp2/L+lW4HxJa+VyZuXX+x8Lz+ecvP09kjYqt5dBfk6PSvox\n8CBwmaTpkuZIOrOQb1Tez/4HOKCQXjy6/JCkqbltUyUNqatndl7+qBbv2zMlDQPOA7bIaRcU3v/7\nc54z69p7SW7jzZLWqHtO/SQ9oaS/pPck7Zwfu0PSlrV25/dwP+CCXPcWuZiDchv/KOkzLV63Jdqh\nwlGU0mf1qZKvf305w/N7OVPpszRA6TM3Argyt/X9+iSdBnwa+ImkC7TkkVfD/UbSxfXvtaTjSUHj\n1rzfIekp5c+S0md4jqRXJf1F6fNyrKQnJc2X9FdJLyj9JaW+vSc0K7vV+ypp+9zuu/Nza7UfdS4i\nfOvkBvwdcD2wSl7/MXA48FXgauAbwE/zY0OBAHbK6+OBk/LytLwTbAw8DQwk9VC8Bdg/5wngC3l5\nFeAuYGBePwVYSPrlsg7QAZwEXA4cCKxHGt2h1uGkf77/NXBiXu4HrAtsB8wC1gTWAuYAH8+32wrP\n/RFgCLAXqefMUOAd4DZgl1xGACNJY/D9Lm93eqHOvYBrGryuXwZ+mJfPqHudLsnLOwOzC3keANbI\n638DrJ6XhwHT8/KuwOvAZoX35AngEtIPrz/ltOJrezCpuz7A+oU2fgc4Li9fDtwA9Mvr5wP/Wcg7\noPAefi4v/wfw7S7sa0OB94CReX29wvs2DfgYsDpp6KlhgICrgBsavKbXA2Py8leA3wGvFeqpva4/\nAL6Yl1cF1ig+XngPx+X6Vsqvw84s3h+G53xXAV9q8Lz+G/go8A+k/+p9C1gNeLJBuy8HDixsOw34\nXl7eF/hDk9dtqXbkbUfktA2Apwr7SKPXrFk5M4FdctpZtfe9WH79et1ysb5m+81S73VefwrYoJD/\nqfxcap/hfwJ+xuLP8DakfXC3nP9u8me6QXubld30fQVmAzvm5fNosR+V2ed9JFTOHqQ3/H5JM/L6\n5hFxKbA2cBQpGNQ8ExF35uVfkH4RFW0PTIuIBRHxDnAl6QMN8C5wTV7+W2BrYEqu93hgUUS8ERGv\nsPSfbF8B3gQulXQA8EZO3x24GCAi3o2Il3Obro2I1yPiNeC3wGci4iFgQ6VrBtvm+p4mfQntRera\n/i5pvL0tgT8CCyLiHtIHYmiuczwpUEP6AvxZk9e2mV/l9t4OrKPF56knRcRf8/IqwCWSZgG/YclT\nOPdFxJO5jKeA+aQvsImkL/C1WPK1/TZp9AyArfMv9FnAF0lfnjW/iYh38/KepBHZyfUsyotvk76k\nIQXNoV187n/OryfAFyQ9CDyU27EV8BHSl/fcSJ/4XzQp51PAL/Pyz1l6P6y5G/impJNJQ638tUGe\n2vv/EOkI7SOkIEhuy4y83Oz53kHax3cGzs1t2Z4UkMr4bSfll21HGfXlbEH6QXdbTpvA4s9rdzTb\nbxq91618GrgWmE4Kci8BY4BBpB8yF+Z9eyhLnoIra6nXM38O146Iu3L6Lwv5y+xHS2nL/wl1g4AJ\nEXHqEonpNE3ti2st4NW8XN/vvX5dLep6s/AlJ2BORHwq13ciMKDZhpH+iLsDKUgeAhxLCkCNtGrD\n1aQjq/9F+tKu5T+X9CffGyJi69ymfUiBD9KOv3JuyzOSnpO0O/BJ0pd5VzR7DV8vpP0r8BywLenX\n+ZuFx4r5AC4CdgOGk344/CMqzkHnAAAHUklEQVSF17bO5aQj04clfZn0AW9Urhq0E+D/5eAAKWB3\n9XP2OoCkzUg/braPiEWSLicdBdGk3s403CYifql0+vezwGRJXyUdORYJODcifrpEojQUeKuQ9C7p\nSKreHaQfaxsDp5HOHuwK3F6y7bU6Wr2ejdrxDosvO6y+1BblyunpC/VL7TedvNetyiEi/ihpO+Ay\n4HO57LciYngu+yTS91NXNXo9m35vNNqPIuKWzirxkVA5U4EDJW0IIGk9SR8iHVZfSfpQXVLIP0RS\n7cvtUOB/6sq7F9gln3vtl/PcxtIeBwYWyroLOCSfe16btMO9T9JawLqR/ox7IukLt9b+o3OefpLW\nIX3495f0N5LWBD5P+qKAFHgOIQWiWg+qyaQjmtr1kU1qr0cLl5J+pV9VCKxlHZzr+TTwcj56q7cu\nMD8i3gMOI53GaOYeUkDeEPh3UmB8/7WVtIqk2hHP2sB8SavQOnjeTAr05DKa/kDopnVIAellpetK\n++T0x4DNtPh6yaFNtr+L9D5Ceh71+yEAkjYHnoiIi0hH1x8j/aBau5BtMvCVvI+Vff+L7gV2BN6L\niDeBGcDXWLzPFdXXvSyeIp3FgLQ/d8fLwCItvhZ1GIs/r91pa6P9ptl73aqO2md487w+DPgh6XPf\nr/C9sRLp1H+jsrrU/nzU9qqkkTmptn8124865SBUQqTeLt8GbpY0E5hCOsTdHjg/Iq4E3pZ0RN7k\nUWBMzrse+VRYobz5wKnArcDDwIMRcV2Det8mfXDOl/Qw6ZTWI6QP8DUs/QFeG7gh13sb6UgB4ARg\nt3x66QHgoxHxIOkX/32kL4hL86k4Ig1jtDbwl9xWIuJm0qH3b0mn4a6m8513EukXWFdPxUH60N8F\n/ITUy6iRH5Ne53uAD7P00U/RR4D1SdfATiX9cCi+tjNIX5KQgtS9pPf5sRZlfgcYkC8GP0w60uox\nEfEw6dTMHNLpzTtz+pukUd5/r9Qx4c9NijgeOCLvD4eR9oNGDgZm51M3HwGuiIgXgTvzc7ug8P7f\nnfejMu9/8bm8RToNWjvNeEfeflaD7BOBb0h6qBBou+u7wNF5X2raMaaEMaTOEjNJX/Jn5fTLSZ0P\nZqiuQ0YLS+03zd7rbBxwU63zQE3hM3wLaRDmAaTrQ98nvda1ffvfWDxoc317G5bdiSOBcZLuJh0Z\n1X4gLrUflSnMIyb0sHx64v3TVe1MqVfShRGxVG+mCtqyEulaxkERMbfq9ph9UElaK19HRum/XYMi\notkPnE75SMh6Rd45ryEddVTdlq1IPQmnOgCZLbPP5iOp2cBnSEd23eYjITMzq4yPhMzMrDIOQmZm\nVhkHITMzq4yDkJmZVcZByGwFpz6YlkFpgM4dS+SrH6jW2pyDkFkPUB9M7bAM+mJahl1Z/Gdfs9Ic\nhKwtSVpTi6d2mC3pYEl75H/pz5I0XtJqOW9x6PwRkqbl5fqpJfpJ+q4WD9F/XM7XcMqIJu06Xoun\n4pioFtNiKE2aNjs/h9vz48s6LcNjShOzzZZ0paQ9Jd2Z27BDkzYPJY0L96+5js9I+pyke/Pr+Qct\nOZ3FtpJuyWX+c7ffRFsxlBlq2zffVrQbaQDTSwrr65KGOvlwXr+CxVNRPEUe7p40Fce0vHwGS04t\ncTTpD7or5/X1WHo6jvenjGjSrmeB1fJybSqOhtNikIa82aQub09My7BNTn+ANISMKEzT0aTdZ5Cn\n4sjrA1j8P8SvsngqhjNIQ1WtQRpG5xlg46r3B9+qu/lIyNrVLGBPpUn8PkP6An4yIv6YHy87XH9x\naok9gZ9Emp6DiFjI0tNxFKeMaGQmadKxL5ECAjSfFuNO4PJ8NFEbvLUnpmWYFWlQ2DmkUSaCJafp\nKGMwaSTlWaQRs4vTYVwXEX+NiBdI4yc2PMKy9uAgZG0pB5vapGDnkn7pN9NqOoDOpnaoTccxPN+2\niYi9WtT1WdJcM9sBD0haOSKeAYrTYtyUn8NRpKC2KTBD0voR8UvSzKR/JQWBRlN51KZlqLVpy4i4\nLD9WHL7/vcL6+9N0lPQD0kRx25BGyy6+bp1NdWJtxEHI2pKkjYE3IuIXpJGWdyRN2rVlzlIcrv8p\nFk8H8I80dzNwVK2TgqTaTLfNpoyob9NKwKYRcSvwv0kdCmrzwCw1LYakLSLi3og4DXgB2FR9My1D\nI/V1rAv8JS+Pqcs7WtLqktYndWgoO7GdrYAchKxdbQPcl0+RfYt0RHEE8Jt8Cuk90jQSAGcC/yXp\nDtLkXs1cSpq2fWYeQv+fYunpOIpTRtTrB/wi1/8QaQTyl/JjjabFuCB3gphNmlvmYfpgWoYmrgc+\nX+uYQLr285v8mr1Ql/c+4PekaR3Ojohnl7Fu+wDzAKZmHwBajqbFMOtJy/N/G8yM96fFOJquT5Fu\nttzzkZBZBST9CNipLvm/IqI7s9D2GaXZg+snMLszIo6poj32wecgZGZmlXHHBDMzq4yDkJmZVcZB\nyMzMKuMgZGZmlXEQMjOzyvx/HzAudAc4UhUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x25be7e64128>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(df_train_merged['source_system_tab']);\n",
    "plt.xlabel('source_sysstem_tab');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAasAAAELCAYAAABnDamDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xm8XWV97/HPN2EUhAQIXMpgKKYX\nEWrUqDiHQQioRCgoXAuBUlEKilXvFW0rqFhBRSoVsVjCJAURGSKgiEiEVgTClIHBHAhDJECABEhC\nAkl+94/ntzgrJ2fY53CGFfJ9v177tfd+9nqGNf7W8Oy1FBGYmZk12bChboCZmVlPHKzMzKzxHKzM\nzKzxHKzMzKzxHKzMzKzxHKzMzKzxHKzMzKzxHKzMzKzxHKzMzKzx1hnqBjTdFltsEaNHjx7qZpiZ\nrVHuuOOOpyNiVH+V52DVg9GjRzNt2rShboaZ2RpF0iP9WZ5PA5qZWeM5WJmZWeM5WJmZWeM5WJmZ\nWeM5WJmZWeM5WJmZWeM5WJmZWeM5WJmZWeM5WJmZWeP5DhZrkPvPnNinfDsde1U/t8TMbHD5yMrM\nzBrPwcrMzBrPwcrMzBrPwcrMzBrPwcrMzBrPwcrMzBrPwcrMzBrPwcrMzBrPwcrMzBrPwcrMzBpv\nwIKVpA0k3SbpHkmzJH0903eQdKuk2ZJ+Jmm9TF8/v7fl76NrZX0l0x+QtE8tfUKmtUk6oZbe6zrM\nzKy5BvLIahmwR0S8BRgLTJC0G3AqcHpEjAEWAEfl8EcBCyLijcDpORySdgYOAd4MTAB+JGm4pOHA\nmcC+wM7AoTksva3DzMyabcCCVRSL8uu6+QpgD+CyTD8f+Fh+npjfyd/3lKRMvyQilkXEHKANeGe+\n2iLioYh4CbgEmJh5eluHmZk12IBes8ojoLuBp4DrgQeBhRGxPAeZC2yTn7cBHgPI358DNq+nd8jT\nVfrmfaijY7uPljRN0rT58+f3beTNzKzfDGiwiogVETEW2JZyJPSmzgbL986OcKIf07urY9WEiLMj\nYlxEjBs1alQnWczMbDANSm/AiFgITAV2A0ZIqp6jtS3weH6eC2wHkL9vCjxbT++Qp6v0p/tQh5mZ\nNdhA9gYcJWlEft4Q2Au4D7gROCgHmwRUTwackt/J338XEZHph2RPvh2AMcBtwO3AmOz5tx6lE8aU\nzNPbOszMrMEG8knBWwPnZ6+9YcClEXG1pHuBSySdDNwFnJPDnwNcKKmNcrRzCEBEzJJ0KXAvsBw4\nNiJWAEg6DrgOGA5MjohZWdaXe1OHmZk124AFq4iYDry1k/SHKNevOqYvBQ7uoqxvAd/qJP1a4Nr+\nqMPMzJrLd7AwM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7Ay\nM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PGc7AyM7PG\nc7AyM7PGc7AyM7PGc7AyM7PG6zFYSdpI0rD8/FeS9pe07sA3zczMrGjlyOomYANJ2wA3AEcC5w1k\no8zMzOpaCVaKiCXAgcC/R8QBwM49ZpK2k3SjpPskzZJ0fKafJOnPku7O1361PF+R1CbpAUn71NIn\nZFqbpBNq6TtIulXSbEk/k7Repq+f39vy99E91WFmZs3VUrCS9G7gk8A1mbZOC/mWA1+MiDcBuwHH\nSqqC3OkRMTZf12YlOwOHAG8GJgA/kjRc0nDgTGBfSpA8tFbOqVnWGGABcFSmHwUsiIg3AqfncF3W\n0cK4mJnZEGolWH0e+ApwRUTMkvSXwI09ZYqIeRFxZ35+AbgP2KabLBOBSyJiWUTMAdqAd+arLSIe\nioiXgEuAiZIE7AFclvnPBz5WK+v8/HwZsGcO31UdZmbWYD0Gq4j4fUTsD/wwvz8UEZ/rTSV5Gu6t\nwK2ZdJyk6ZImSxqZadsAj9Wyzc20rtI3BxZGxPIO6auUlb8/l8N3VZaZmTVYK70B3y3pXsqREZLe\nIulHrVYgaWPgF8DnI+J54CxgR2AsMA84rRq0k+zRh/S+lNWxzUdLmiZp2vz58zvJYmZmg6mV04D/\nBuwDPAMQEfcAH2il8Ozi/gvgooi4PPM/GRErImIl8BPaT8PNBbarZd8WeLyb9KeBEZLW6ZC+Sln5\n+6bAs92UtYqIODsixkXEuFGjRrUyqmZmNoBa+lNwRDzWIWlFT3nyGtE5wH0R8f1a+ta1wQ4AZubn\nKcAh2ZNvB2AMcBtwOzAme/6tR+kgMSUignLt7KDMPwm4qlbWpPx8EPC7HL6rOszMrMFa6dX3mKT3\nAJHB4nPkKcEevBc4DJgh6e5M+yqlN99Yyum3h4FPA2TnjUuBeyk9CY+NiBUAko4DrgOGA5MjYlaW\n92XgEkknA3dRgiP5fqGkNsoR1SE91WFmZs2lcsDRzQDSFsAPgL0o13x+AxwfEc8MfPOG3rhx42La\ntGlD3QwA7j9zYp/y7XTsVT0PZGbWjyTdERHj+qu8Ho+sIuJpyn+szMzMhkQrvQHPlzSi9n2kpMkD\n2ywzM7N2rXSw+OuIWFh9iYgFlP9MmZmZDYpWgtWw2h93kbQZrXXMMDMz6xetBJ3TgD9Iqm5rdDDw\nrYFrkpmZ2apa6WBxgaQ7gN0pvQEPjIh7B7xlZmZmqdXTefdT7mq+DoCk7SPi0QFrlZmZWU2PwUrS\nZ4ETgScpd64Q5Q+9fz2wTTMzMytaObI6Hvjfa8ufgM3MrHla6Q34GOURG2ZmZkOilSOrh4Cpkq4B\nllWJ9ZvTmpmZDaRWgtWj+VovX2ZmZoOqla7rXweQtFFELB74JpmZma1qwJ8UbGZm9moN6JOCzczM\n+sOAPSnYzMysvwzkk4LNzMz6RStHVp8BjgW2AeYCY/O7mZnZoOj2yErScOCwiPCTgs3MbMh0e2QV\nESuAiYPUFjMzs061cs3qfyT9EPgZ8Mr/rCLizgFrlZmZWU0rweo9+f6NWloAe/R/c8zMzFbX0zWr\nYcBZEXHpILXHzMxsNT1ds1oJHNeXgiVtJ+lGSfdJmiXp+EzfTNL1kmbn+8hMl6QzJLVJmi7pbbWy\nJuXwsyVNqqW/XdKMzHOGJPW1DjMza65Wuq5fL+lLGXw2q14t5FsOfDEi3gTsBhwraWfgBOCGiBgD\n3JDfAfYFxuTraOAsKIGH8vDHdwHvBE6sgk8Oc3Qt34RM71UdZmbWbK0Eq7+j/K/qJuCOfE3rKVNE\nzKs6YUTEC5Q/Em9D6V14fg52PvCx/DwRuCCKPwIjJG1NudXT9RHxbEQsAK4HJuRvm0TELRERwAUd\nyupNHWZm1mCt3HV9h1dbiaTRwFuBW4GtImJelj1P0pY52DaUBz1W5mZad+lzO0mnD3XMexWjZ2Zm\nA6zHYCXp8M7SI+KCViqQtDHwC+DzEfF8XlbqdNDOqulDerfNaSWPpKMppwnZfvvteyjSzMwGWiun\nAd9Re70fOAnYv5XCJa1LCVQXRcTlmfxkdeot35/K9LnAdrXs2wKP95C+bSfpfaljFRFxdkSMi4hx\no0aNamVUzcxsAPUYrCLis7XXpyin83p8YnD2zDsHuC8ivl/7aQpQ9eibBFxVSz88e+ztBjyXp/Ku\nA/aWNDI7VuwNXJe/vSBpt6zr8A5l9aYOMzNrsFb+FNzREkpvup68FzgMmCHp7kz7KnAKcKmko4BH\ngYPzt2uB/YC2rONIgIh4VtI3gdtzuG9ExLP5+RjgPGBD4Ff5ord1mJlZs7VyzeqXtF/XGQbsDPT4\nJ+GI+G86v0YEsGcnwwdd3M09IiYDkztJnwbs0kn6M72tw8zMmquVI6vv1T4vBx6JiLldDWxmZtbf\nWglWjwLzImIpgKQNJY2OiIcHtGVmZmapld6APwdW1r6vyDQzM7NB0UqwWiciXqq+5OceewOamZn1\nl1aC1XxJr/yvStJE4OmBa5KZmdmqWrlm9RngonwAI5Q/1nZ6VwszM7OB0Mq9AR8EdsvbJilvSmtm\nZjZoejwNKOlfJY2IiEUR8ULeSeLkwWicmZkZtHbNat+IWFh9ycd07DdwTTIzM1tVK8FquKT1qy+S\nNgTW72Z4MzOzftVKB4ufAjdIOpdy26W/o/3BhmZmZgOulQ4W35E0Hdgrk74ZEdcNbLPMzMzatXrX\n9buAdSlHVncNXHPMzMxW10pvwI8DtwEHAR8HbpV00EA3zMzMrNLKkdU/Ae+IiKcAJI0CfgtcNpAN\nMzMzq7TSG3BYFajSMy3mMzMz6xetHFn9WtJ1wMX5/ROUJ+6amZkNilZ6A/5fSQcC76M8+ffsiLhi\nwFtmZmaWWuoNGBGXA5cPcFvMzMw65WtPZmbWeA5WZmbWeF0GK0k35Pupg9ccMzOz1XV3zWprSR8E\n9pd0CaVzxSsi4s4BbZmZmVnq7jTg14ATgG2B7wOn1V7f66lgSZMlPSVpZi3tJEl/lnR3vvar/fYV\nSW2SHpC0Ty19Qqa1STqhlr6DpFslzZb0M0nrZfr6+b0tfx/dUx1mZtZsXQariLgsIvYFvhMRu3d4\n7dFC2ecBEzpJPz0ixubrWgBJOwOHAG/OPD+SNFzScOBMYF9gZ+DQHBbg1CxrDLAAOCrTjwIWRMQb\ngdNzuC7raGE8zMxsiPXYwSIivilpf0nfy9dHWik4Im4Cnm2xHROBSyJiWUTMAdqAd+arLSIeioiX\ngEuAiZIE7EH7LZ/OBz5WK6t6hMllwJ45fFd1mJlZw7VyI9tvA8cD9+br+Ezrq+MkTc/ThCMzbRvg\nsdowczOtq/TNgYURsbxD+ipl5e/P5fBdlWVmZg3XStf1DwMfiojJETGZcgrtw32s7yxgR2AsMI9y\n/Qs6dN5I0Yf0vpS1GklHS5omadr8+fM7G8TMzAZRq/+zGlH7vGlfK4uIJyNiRUSsBH5C+2m4ucB2\ntUG3BR7vJv1pYISkdTqkr1JW/r4p5XRkV2V11s6zI2JcRIwbNWpUX0bVzMz6USvB6tvAXZLOk3Q+\ncAfwr32pTNLWta8HAFVPwSnAIdmTbwdgDOUZWrcDY7Ln33qUDhJTIiKAGynP2AKYBFxVK2tSfj4I\n+F0O31UdZmbWcK3cyPZiSVOBd1BOpX05Ip7oKZ+ki4HxwBaS5gInAuMljaWcfnsY+HTWMUvSpZRr\nYsuBYyNiRZZzHHAdMByYHBGzsoovA5dIOpny9OJzMv0c4EJJbZQjqkN6qsPMzJpN5aDDujJu3LiY\nNm3aUDcDgPvPnNinfDsde1XPA5mZ9SNJd0TEuP4qz/cGNDOzxnOwMjOzxus2WEkaVr9dkpmZ2VDo\nNlhlF/N7JG0/SO0xMzNbTStPCt4amCXpNmBxlRgR+w9Yq8zMzGpaCVZfH/BWmJmZdaOV/1n9XtIb\ngDER8VtJr6P858nMzGxQtHIj209R7l7+H5m0DXDlQDbKzMysrpXTgMdS7uF3K0BEzJa05YC26jXq\nibNO7nWe/3XMPw9AS8zM1iyt/M9qWT5LCnjl5rC+7YWZmQ2aVoLV7yV9FdhQ0oeAnwO/HNhmmZmZ\ntWslWJ0AzAdmUG48ey3gc1NmZjZoWukNuDIfDXIr5fTfA+G735qZ2SDqMVhJ+jDwY+BByiNCdpD0\n6Yj41UA3zszMDFrrDXgasHtEtAFI2hG4BnCwMjOzQdHKNaunqkCVHgKeGqD2mJmZrabLIytJB+bH\nWZKuBS6lXLM6mPK4eTMzs0HR3WnAj9Y+Pwl8MD/PB0YOWIvMzMw66DJYRcSRg9kQMzOzrrTSG3AH\n4LPA6PrwfkSImZkNllZ6A14JnEO5a8XKgW2OmZnZ6loJVksj4owBb4mZmVkXWglWP5B0IvAbYFmV\nGBF3DlirzMzMalr5n9WuwKeAUyh/ED4N+F5PmSRNlvSUpJm1tM0kXS9pdr6PzHRJOkNSm6Tpkt5W\nyzMph58taVIt/e2SZmSeMySpr3WYmVmztRKsDgD+MiI+GBG752uPFvKdB0zokHYCcENEjAFuyO8A\n+wJj8nU0cBaUwAOcCLyL8kytE6vgk8McXcs3oS91mJlZ87USrO4BRvS24Ii4CXi2Q/JE4Pz8fD7w\nsVr6BVH8ERghaWtgH+D6iHg2IhYA1wMT8rdNIuKWvKnuBR3K6k0dZmbWcK1cs9oKuF/S7ax6zaov\nXde3ioh5mX9e7YnD2wCP1Yabm2ndpc/tJL0vdczrw3iYmdkgaiVYnTjgrSh3c+8o+pDelzpWH1A6\nmnKqkO23376HYs3MbKC18jyr3/djfU9K2jqPeLam/Ya4c4HtasNtCzye6eM7pE/N9G07Gb4vdawm\nIs4GzgYYN26cn91lZjbEerxmJekFSc/na6mkFZKe72N9U4CqR98k4Kpa+uHZY2834Lk8lXcdsLek\nkdmxYm/guvztBUm7ZS/AwzuU1Zs6zMys4Vo5snp9/bukj1F65nVL0sWUo6ItJM2lnE48BbhU0lHA\no5Q7uANcC+wHtAFLgCOz7mclfZP2u7x/IyKqThvHUHocbkh5tlb1fK1e1WFmZs3XyjWrVUTElZJO\naGG4Q7v4ac9Ohg3g2C7KmQxM7iR9GrBLJ+nP9LYOMzNrtlZuZHtg7eswYBw9d2YwMzPrN60cWdWf\na7UceJjynyUzM7NB0co1K1/bMTOzIdXdY+2/1k2+iIhvDkB7zMzMVtPdkdXiTtI2Ao4CNgccrMzM\nbFB091j706rPkl4PHE/p7n0J5c7rZmZmg6Lba1Z51/MvAJ+k3BT2bXlDWTMzs0HT3TWr7wIHUm47\ntGtELBq0VpmZmdV0d7ulLwJ/Afwz8HjtlksvvIrbLZmZmfVad9esWnnWlZmZ2YBzQDIzs8ZzsDIz\ns8ZzsDIzs8ZzsDIzs8ZzsDIzs8ZzsDIzs8ZzsDIzs8ZzsDIzs8ZzsDIzs8ZzsDIzs8ZzsDIzs8Zz\nsDIzs8ZzsDIzs8YbkmAl6WFJMyTdLWlapm0m6XpJs/N9ZKZL0hmS2iRNl/S2WjmTcvjZkibV0t+e\n5bdlXnVXh5mZNdtQHlntHhFjI2Jcfj8BuCEixgA35HeAfYEx+ToaOAteeYrxicC7gHcCJ9aCz1k5\nbJVvQg91mJlZgzXpNOBE4Pz8fD7wsVr6BVH8ERghaWtgH+D6iHg2IhYA1wMT8rdNIuKWiAjggg5l\ndVaHmZk12FAFqwB+I+kOSUdn2lYRMQ8g37fM9G2Ax2p552Zad+lzO0nvrg4zM2uwLp8UPMDeGxGP\nS9oSuF7S/d0Mq07Sog/pLcsAejTA9ttv35usZmY2AIYkWEXE4/n+lKQrKNecnpS0dUTMy1N5T+Xg\nc4Htatm3BR7P9PEd0qdm+radDE83dXRs39nA2QDjxo3rVaBbG1x27oSeB+rgoCN/PQAtMbO1xaCf\nBpS0kaTXV5+BvYGZwBSg6tE3CbgqP08BDs9egbsBz+UpvOuAvSWNzI4VewPX5W8vSNotewEe3qGs\nzuowM7MGG4ojq62AK7I3+TrAf0XEryXdDlwq6SjgUeDgHP5aYD+gDVgCHAkQEc9K+iZwew73jYh4\nNj8fA5wHbAj8Kl8Ap3RRh5mZNdigB6uIeAh4SyfpzwB7dpIewLFdlDUZmNxJ+jRgl1brMDOzZhuq\nDhZmQ27fq/6m13l+NfEXA9ASM+tJk/5nZWZm1ikHKzMzazwHKzMzazwHKzMzazwHKzMzazwHKzMz\nazwHKzMzazz/z8psiH348rN6neeaA48ZgJaYNZePrMzMrPEcrMzMrPF8GtDWSCdduk/f8n38un5u\nifWXT1ze1qd8Pzvwjf3cEmsiH1mZmVnj+chqLTP1Jx/udZ7xn7pmAFpiZtY6ByuzNdxHLruoT/mu\nPuiT/dwSs4Hj04BmZtZ4DlZmZtZ4DlZmZtZ4DlZmZtZ4DlZmZtZ4DlZmZtZ4DlZmZtZ4/p9Vi+af\n9dNe5xl1zN8OQEteG/7jwt7fLunThzXvVkn7XXFyr/Nce8A/D0BLzF7b1spgJWkC8ANgOPCfEXHK\nEDfJbEjtf9kve51nykEfHYCWmHVurTsNKGk4cCawL7AzcKiknYe2VWZm1p21LlgB7wTaIuKhiHgJ\nuASYOMRtMjOzbqyNpwG3AR6rfZ8LvGuI2mL2mnHAL/6713mu+Jv39WsbzrziyT7lO/aArV75/Kuf\nPd3r/Pt+YotVvt/1n0/1uoy3/v2Wvc6zNlFEDHUbBpWkg4F9IuLv8/thwDsj4rO1YY4Gjs6v/xt4\noIditwB6v4T3X/7XUhlNaEN/lNGENjSljCa0oSllNKENg1XGGyJi1Kus4xVr45HVXGC72vdtgcfr\nA0TE2cDZrRYoaVpEjOtrg15t/tdSGU1oQ3+U0YQ2NKWMJrShKWU0oQ1NKqM31sZrVrcDYyTtIGk9\n4BBgyhC3yczMurHWHVlFxHJJxwHXUbquT46IWUPcLDMz68ZaF6wAIuJa4Np+LLLlU4YDlP+1VEYT\n2tAfZTShDU0powltaEoZTWhDk8po2VrXwcLMzNY8a+M1KzMzW8Os8cFK0raSrpI0W9KDkn6QHSd6\nyvewpBWS7pa0ON9PkLSoD204T9JB3fw+XtLVWecWtfSq/lmS7pH0BUnD8rdxku6V9ENJP8y0z0g6\nvId63pOfp0oal5+vlTSik+Gr+udJmt6h/hWS5kuaKemXkkZ0aO9Dkh6TdJ+k+yVdKOmMalpIOknS\nl7Ksb0jaqxfT86sdvh8gKSTtVEubJGlJ1v1ozv/tuyjviByXu/N1QaaHpAtrw62Tw11dS6svX3+W\ndGm1fEkaK+n7tflztaSL69Nf0j/l9JqedQ/If/okjZa0okPaK/OgQ/rncr7dkm0+obvhe6h3tXnT\nyTAjJE2pTYeZkn7Xw/D/UPve62nY2Xrcyfp2cW3ejcvld6qkl7qrT9KRtWXpJUkLJM2RdIqk92fe\nuyVt2FM7WxiPvSTdKWmlpP1z2i3Iel/MdXt3STN7KKca95mSfi7pda+iTavMn/7QcZ3vVESssS9A\nwG3Akfl9OHAO8N0W8j4MLOokfbW0Fso6Dziom9/HA1dnnVsAwzvWBWwJ/Bb4eodyfwj8sMV2nAR8\nKT9PBcb1MPyi2rTYqV5/h7ZdAPxTbfi/zjx/AL5Oufb5D/VpUW9LNW9anJ/DOs4D4OfAzcBJ+X0X\nYA4wO78fkdP3A/l9nQ75j+hsGgKLgLuADfP7vsDdwNVdLF9fB26tlq8s97dV2Z1M/yOBW4D1M20L\n4C+q9nXSzk6nUT29ljeAC2vpO2ba1bXla5V5UBv2fmAH4D+B6Zn2GeAK4EutLDu1si6tz5vO2g4c\nACzpOB06G8ec/+cCS4EZwL3AnZ3l7Tj9elqPWXWZ3jrLvhuYWUu/E1jRXVs7lPkwcDG5/gM/ri0v\nXbavm/KG1/MBe2WbHgXuodzQ4DPATGA0MBY4uD4OPU0P4CLgC71tW619o6v6+jKOrc6v1Ybpj4qG\n6gXsCdzUIW0T4BngdZSNyeXAr4HZwHc6LGTVxrd6H58L6gvAQzlTLwa+TNloBbAsf7+Zcm/BK4Bn\ns7xrgF8Cy4GVlI3hP2S5S4AFuQI+AMzK8mZmnU8AbcDLwKgc7glKsLocmAe8lL+fC3yO8oe8lVnu\nWcD8/H0x8CJwIzAS+HPmvyiHeSlfy/N75PALctzuyrTnKRuNR/J7NdzLmfflTFuUbV+W47K8w/Bz\ns77ZtTxB2Wh+sZbnZeAX+X1FTpubsx335+vtwJNZ5guUjc4ROS0ezOl8E2Wez8hp8ZWcjrdTgs0f\nsl3La+PzQuZdTvnfXVtO2xWUYF0Fg0X5fmfmrdr6nXy/nbL8RZZVjcfttC8XyynLzJIc7uWcPk/k\neCzNeh4D/gtYCPwxp98fKMtkNQ3nAn8DfLvDPPsjZf7Py2lwA2UZXpj1zaAE0sdy2nw4P38t23E1\nZXlYBtxH2ZkR8N0cnxmU5XJx1rEcOBD4U7bhmZzW83O+VdNjac7TWZR15pJMX5bjPDunxcqcvs9n\ne16X6+hlOe0ez/Y9BTxXK+PLwKdyui+lfX3eM8f7lsz7MCVQVcve0pwff6Z9faza8QBwKuXm19X8\nX0gJHkvz84tZ/0s5zEs5jtXy8FIOe0/+/kKmX0v78vcYJYBenuUtpizT9+T8WJHteaH6PafJYVnG\nizktrs1yFmXb/yfbfDGwQU6/5ynrUTX+j1CW+QU5feYD/whcmdN4GWV5Wpb5q+X2kSx/QdY5Hfgs\nMDnH/S5gYrbzCDrZHgOnZDvuBi7qanu/pp8GfDNwRz0hIp6n7IW8MZPGAp8AdgU+Ian+h+ANJd1d\ne9+dsmd3NGVGvoOyB/p+ykYf4HzKyrcRcBXwe8r/tP4fZSHaC3gP5c4XAv6FEkChzOCPZrufoMz4\nB7LO5ZQFcjkl8KxHWRgA3krZS/9X4ERgXeBkygy/GdifcuRzP2WF24+yoWjL4V/OOt6Vbf5Rlrs8\ny1iS4/xZYGNKgF2ZeZYAm1IWxpWUlec+ypENmfZvlKBRBXJRNmCzKCvHrbX81fgtyOFOzff3Zdt2\noqx0L1IW3v/IafTTzDM5582n8vu3sh2vpxwRbA68LfN9NKfzYZRl4M3AWyhHFDMoe4kHU1a4jbOc\nfbOMNsrfG9ooR5Kfzt8vznYtoGx87qUcXeydv+9IWUlX0h7stqIE2WeB03J878pxgrKy/4ay0m4M\nbEjZCdqSMl+vpGy4Xsjh9sx8P87pey/w3kybQ9lR25my/P47JbCOp2zI/0/Og91z/gD8JWWZqJYF\n8vc5lHVps/z9EuBQysZsdk6rdXM6VBvDyPHcLMt9kfLHeyiB4BHKuvgXmbYfcFxErE8JIC9SAvQj\nlMBSDfewpCnAhyjr4kzKclHtQD5CWc6PpWwQh1GCyC2Z/4p83wUYQdlYL8nhqjLWz/qG5Xgto+x8\nXgzsQ1nmqpten0vZ9qxHmcdHUM4qVMv+k5SN8rmU5eHZLH8MZVn/H8q8fi6ny7qUnYlxwEeAv8vf\nX6KsWw9TloEFEfH6nDeoXFb4l2zrxpTtwR6U5fEFyvL4MGVZ3JWynr9EWV+OpyxjMyjbsQ2B63O8\nngdOyHqeymm0LWWduYb2I98bKNvEayg7Ku8AvgrMiIh3UJaj70raKKfbatvjiDgBeDEixkbEJ+nC\nmh6sRJmI3aXfEBHPRUS1YXkgLYGiAAAOd0lEQVRDbbgXI2Js7f1GYEVEXEyZEdtTjkbeS9ngQLkR\n7lspC84OlA0vlIVhQ8rKvR9lD3AY5SipWlkXUwLX5ZSZuD5ljxbKXtBHKCvcBykLemUecAbwecoC\ntjtlg7MeZYNzKGWh3wWIiPh95rsa+EB+fiDL2ZPyR+iqzbtQNnjfoQS2oBxVDqMsmCMpK/1G+dt0\nysrzxvx9GHBM/vYU5ZTgY1nm0qznBcrGfcsc589QNhgjs4yXKRveXWm/u8hTOb0OzWF/RglGO2Xd\nP8zydgC+keUsy/f18/drKRufKv8s4JqImJzDQgn62+e0WJJtWZnTeC/grygbmP1o38sWZWP9XI73\nM5RTI1A2BNdRVvZ5lKA0J/OMAr6Q0+z9lA0clOXrQ5QjwPWzvI8AT0dEtVNwQab/rxzHFZRlbzFl\nHlbX6x6LiGqnYkWO5yY5T3albIjqf1kZSQnu/0VZpsi2bkD79mFLyjzfPoeZGBEH5rS6mbLxfZKy\nPN5Cmd/V+vcJ2o/MJ1F2XDbNcVg385wuaR4lEPwlJaC+IafRXZQAdidlR2P9nLYzKRu+zXM8l+Sw\nG9N+q7RNKDsaf6R9eZ5D2UF6K+U+oZHT4znKOrgy0xZnPeMpy8LibOuU/H0v2uf53GzDd7Os27O8\nwylHvHtTgveKnK6X5bwYlu0nh59J2divjIiLoxx2nEH7jtQTwAhJJ9bmzW45X9ajzNsP5G835XRe\nnnWJ9nXlSWBpRFxC2UF9A2WbsyVlXu5FWWc2yzrXoczXjXJePlObBzfk+L2XEtBvzXzH5wHA1Bzn\navnsbnvcrTU9WM2i7Im8QtImlA3eg5m0rPbzClr4b5lKJ4M3UWb0ppl8L2VB3o2y4Z3E6oFSlIC1\nF/BuygbtBcqCRA7/FcoK/iRlBXuC9qBRrVSbUK6FVO6hLNTTKSvrFZSN5u+yjX9PObp6opM2Varr\nFB+nrPzDc1rMyjaeRQmGKyiBYiXlqGIJZaP7YpZTbaSWUYLYSsoR0a21uh6hHH0Mz++b5nAb5Xjf\nRQnqZ2Z7Z+fOwn5ZJ9mG3SnBdXvKHt+R+dsvsr1tEbE75dTVizk+wyhB8juUlWgOZU8P2jfyr8h6\nf0uZd7tn8nqUebRvTpvZ2fYVlOm/HWWFrZe1DqvuJFVHVsrptDzreTSnz79TNhQvU04vbZjTegXw\nt5QN4ObtxbOYckQ4lnLktDwipme73k9Zdur+lMM9TQk01f0tV2SbKi9SgsbmrGpJ7umOyXF6d+a7\nOyJelLQ5ZSP6Aco03ibz3UTZ2alOrb1ICeBB2SDfm2nr5/AzKMv+93N67Z/fV1B2FDenHLmuQ/s8\n2JjypITn8nV1TrtRlCOG/5tlTacsY9W8UU6XxRGxgrI8BOVI8XDKPH846/geZX0aRtlhEeVywdic\nNrtSrsWRdUH7vF9GmX8LKYGrLct4Crg1It4InA7Mj4i9a3mrnYWu3EIJEvtRjpiGZbv+DDyVbbs+\n58HnKUfCD1JO6S3J98coR8krJY2nbKuuoZyOX0g5Onww61hC2Wb8irL8VeNWjfPK/C7Kju6D2YaZ\nlPuvjs3X9hFxX4f80OL2uLKmB6sbgNcpe8ipPKvqNOC8iFjSbc7u/SPlVNd1wFGU6wQ7UKbX6ZRD\n+M9TVtJjMk91ymwr2g/d30j7XlFlE8rKPIf2IDaMcii/I2XD8QhlI1kZQVmQb6KsWNXprfcC/0z7\nqbX/BpZJen/m+zDl8B7Kgqhs91nZxurIbyVlb+gLlA3Mjplny/z+wxyPFbRfn1K2axjl9MVbcvjl\nlL3W0bSvfJtSVhLl9HmEskf1dspKWp1inUTZeFXBcGPKxv1fImI07UdAc4DjgPUlvTnzbkjZAFan\nPDegBMYtKRuKYax6T8gV8MqNjW/N8arWh6BsMKfn9zdTjnSDEqzPy3qey+kGZcMnyumequfj5tnm\nGykr5daU4LFJ1vV3mWdl/jYqy7k+p926rOoKyjK1HjAsA8Y6lA1t9RjlKujMoj14b0KZpzezuqWU\njdEBlGBQjf+CnDaVN1Pm25tyPTsi2307ZVpfkMMtoWzs1qHMp/tpP4I5gnI69TnK9H+ZcqS4ZUR8\nl7Ix/yxlR2AYZTqfQzmNS06XR7OON1HWkYU5/odk3oXZnur06x6U5bQ6ygdA0ijK8rKEEuCqI72q\nB9/PKZ0rFmVZr6Nsa8Zn/i0pZ0yiNt0Ooz1gXZ75N6EsyztS5vFoSR+k7Bg9W1t+K7dT5u3HJYmy\nnFfbgoX5eQfad4SWUpabkTlfbqdsF+6kLL87AtMy/wbZht/luL6d9tPx2+Z0+hBARNxKWQYPpWxn\nN6f9VP6yLKtyHWUHqz4O/5jtR9Jb6dnLkjou76sa6k4S/dDJYjvKeeXZlJXk32nvyXMEtV5glD2w\n8bUOFtVFver9v2i/cPhnyoJ6P+WaTL2DxdJM35lyVLGAsmJeQ1lIqwvpT1KCxecpK8WjwATK3t20\nLK867bCIsje2mHLqZDztF7+nU1bk5ZQ9qwuyrCeyLfMpC/FOWXbVwWIqJRA8TDnS+GPWdS/tHQMe\noL1DxDJKsK86WDyReX9Sa+eyHJdqb7kah8W0d7CoLtouy98uoBwVLajlqa5tnJ7DVxfTj6ecpllM\neweE0bV5eColYD+aZc+jzPullNMgD2SZv6YckT6R0/ZRSpC9NMuZmmX/Ouf1csoR2njaOwgsogSX\nyHpW5vd7KNfOTs16qw4nKyjLwK/ze9WBY2mOT3XN7uVs55+z7Jdy/lXzZxFlg/5MtvU82nubTaMs\nD9VG43HKBn181v80ZVmek+Nedd44LfNvkW3YgnKkVi1jP856v535bs7xaMs2fY3Su3AqZc+5Wl6n\nZLmTc7izKDs9QdlphHLdrb6sPEBZZh+m3AWhWl4XZ7tn0d55pVounqO9E8pcSvDYP8tZmdO46oR0\ndaa9TNnAP0dZT4P2sxn3UK4FttG+rFbzKvK96oS0kLL+/CDbGFnvp3K+LaYE8iuzzedSgu0NtJ8m\nrtaP6vT2ohyvT+V0eIKyUwYlQFXT40/Z1qqDUHVGppqW11B2rl+m/Uj2FspO35wc7mNZ30zKuj2a\nsq28NduwPD9PpewQLc1h51CWrwcp6+6ETD+JEgwX5HQdRjmCrfL9PqfBjPx+dQvb41MpBwhddrAY\n8mCzJr3ooXslPXRh76lcyvWVPwHDepn/S8A3h3r6vBZflNPMNw91O7Itw3KjdkpnyyL5F4na93Vz\nY7RTF+W9svFg9W7336acUZhJeYTOKsPk9zGUwPnHHH5RZ+2oDf9j4JQexnECZadjZr4mAxvkb3tQ\nNtjT87V/pp9MCTq/zY3kSbXxGFcre6vcuM6gBPR3U+uGncN8iS664b/KebdKPWvCK4PJnkPdjurl\n2y31gqRFEdHxtF799/MoK+llvS2X0gPvW5T/P/y8F3mvoBzq7xERr/b5NFaTf5Y9BvhkRPT+yYL9\n25adKRuPJ4DLI+J7LeQZB5weEe/vadiBlteB7wQOjojZQ92ewSZpNGXbsMsQN6VHKjcQuA24JyIO\n7mn4weJgZfYa1NBAe0VEfHEo22JrLgcrMxt0knYFLuyQvCwiBuR2VLbmc7AyM7PGW9O7rpuZ2VrA\nwcrMzBrPwcrsVZA0pE/bzj+Cmr3mOVjZWkXSRpKuyecZzZT0CUl7SrpL0gxJkyWtn8O+8vyxfN7R\n1Px8kqSzJf0GuEDScEnfy/zTJX02h3u7pN9LukPSdZK27qZdn1N5ftl0SZdk2saSzq2V+zeZvkjl\nGWG3Au/uqh5JO0r6dabfrHzmlMozx86Q9AeV55L19Cy2qZIuU3l22EW1OxN8TdLtOR3PrqVPlXS6\npJtUnpv1DkmXqzwT7ORa2X8r6TaV5yz9hwOvdWuo/+jll1+D+aI8TuMnte/VraD+Kr9fAHw+Pz8M\nbJGfxwFT8/NJlD+uVs/BOoZyv8LqWVObUf6Q+wdgVKZ9ApjcTbsep/3OKyPy/VTg32rDjMz3AD6e\nn7ush3IHhTH5+V3A7/LzeZTbCQ2j3IWlrZt2jafcAWLbHP4W4H3VeNaGuxD4aH6eCpyan4/Pcdua\ncnuj6p6Hb6LceWbdHO5HwOFDvXz41dzXkJ7CMBsCM4DvSTqV8t+f54E5EfGn/P18ymMm/q2HcqZE\nRHVz372AH0fEcoCIeFbSLpT7C16fBxzDKbcD6sp04CJJV1Ju21OVW90hn4hYkB9XUIIjlEfRrFaP\npI0pj6r5eaZD+81jAa6Mcnf2eyVtRfdui4i5ACp30h5NuQ/l7pL+H+XWR5tRbpP0y8wzJd9nALMi\nYl7mf4hyi7T3Ue5Nd3u2b0PKrYjMOuVgZWuViPiTpLdT7ir9bcrzobqynPZT5Rt0+G1x7XNnj6oR\nZSP97hab9mHKHcz3B/4lb3Da1SNwlka5a3iX9ag8fWBhlLtgd6Z+92t1MUxnw64A1pG0AeVoaFxE\nPCbpJFadRvW7c9fzr6T9LujnR8RXeqjbDPA1K1vLSPoLyiMefkp5DMR7KHfCrh7WeRjtd6p/mLL3\nD+X0YVd+A3ym6mwhaTPKzVpHSXp3pq2r1e+wXbVpGLBdRNxIeYjnCMod539DualpNdzITrJ3Wk+U\nh5DOUd45XcVbuhmH3qoC09N5FNflda8u3AAcpHL3ciRtJukN/dg+e41xsLK1za7AbXk6658oj1g5\nknK6bAZlz//HOezXgR9IupkOz8Hq4D8pd3WfLuke4P9ExEuUDfipmXY3JTB2Zjjw06z/Lsr9/BZS\nbtA6Mjsw3EP787Ze0UM9nwSOyvRZlGdA9Yts308op/mupNxgtjf576VM+99Imk55/EeXHVDMfAcL\nMzNrPB9ZmZlZ47mDhdkgknQm5UmudT+IiHOHoj0V+cay1nA+DWhmZo3n04BmZtZ4DlZmZtZ4DlZm\nZtZ4DlZmZtZ4DlZmZtZ4DlZmZtZ4/x9GdBk468G0MwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x25bdb31ab38>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(df_train_merged['source_screen_name']);\n",
    "plt.xlabel('source_screen_name');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcEAAAEKCAYAAABqlO6fAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xm4HFWd//H3JyEghCUsAZEtgMyo\ngCBkWBQVRSGgsk0YcFAColF/oDAjzIAyrCo6iiKgKGvYEVmUERBCZFPWAEnYRCIiRCJEwhKCCZB8\nf3+c09xK031v3ZtUX+6tz+t5+umq01XnnFq/tfUpRQRmZmZ1NKS/K2BmZtZfHATNzKy2HATNzKy2\nHATNzKy2HATNzKy2HATNzKy2HATNzKy2HATNzKy2HATNzKy2lurvCrzVrbbaajFq1Kj+roaZ2YBy\n7733/j0iRvZ3PXriINiDUaNGMXny5P6uhpnZgCLpL/1dhzJ8OdTMzGrLQdDMzGrLQdDMzGrLQdDM\nzGrLQdDMzGrLQdDMzGrLQdDMzGrLQdDMzGrLf5Z/C3vylLGV5LvuVy+vJF8zs4HGZ4JmZlZbDoJm\nZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZb\nDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZblQVB\nSetIuknSI5IeknRITl9F0kRJj+XvlXO6JJ0iabqkaZK2KOQ1Lg//mKRxhfQtJT2QxzlFkvpahpmZ\n1U+VZ4KvA1+LiHcD2wAHSXoPcAQwKSI2AiblfoCdgY3yZzxwOqSABhwDbA1sBRzTCGp5mPGF8cbk\n9F6VYWZm9VRZEIyImRFxX+6eAzwCrAXsBpyXBzsP2D137wacH8mdwAhJawI7ARMjYnZEPA9MBMbk\n31aMiDsiIoDzm/LqTRlmZlZDHbknKGkU8D7gLmCNiJgJKVACq+fB1gKeKow2I6d1lz6jRTp9KKO5\nvuMlTZY0edasWb2ZVDMzG0AqD4KSlgeuAA6NiJe6G7RFWvQhvdvqlBknIs6IiNERMXrkyJE9ZGlm\nZgNVpUFQ0jBSALwoIq7Myc80LkHm72dz+gxgncLoawNP95C+dov0vpRhZmY1tFRVGecnNc8GHomI\nHxR+uhoYB3wnf/+qkH6wpEtJD8G8GBEzJV0PfLvwMMyOwJERMVvSHEnbkC6z7gec2pcyyk7TrNMv\nLD8DemHklz9TSb5mZta9yoIg8AHgs8ADkqbktK+TAtNlkg4EngT2yr9dC+wCTAdeAQ4AyMHuBOCe\nPNzxETE7d38ZmAAsC1yXP/S2DDMzq6fKgmBE/I7W9+AAdmgxfAAHtcnrHOCcFumTgU1apD/X2zLM\nzKx+3GKMmZnVloOgmZnVloOgmZnVloOgmZnVloOgmZnVloOgmZnVloOgmZnVloOgmZnVloOgmZnV\nloOgmZnVloOgmZnVloOgmZnVVo9BUNJwSUNy9z9J2jW/J9DMzGxAK3MmeCvwNklrAZNIrx+aUGWl\nzMzMOqFMEFREvALsCZwaEXsA76m2WmZmZtUrFQQlbQvsC1yT06p8Ga+ZmVlHlAmChwJHAldFxEOS\nNgBuqrZaZmZm1evxjC4ibgFukTQ89z8OfLXqipmZmVWtzNOh20p6GHgk928m6SeV18zMzKxiZS6H\nngzsBDwHEBFTgQ9VWSkzM7NOKPVn+Yh4qilpQQV1MTMz66gyT3k+Jen9QEhamnQ/8JFqq2VmZla9\nMmeCXwIOAtYCZgCb534zM7MBrczToX8n/UfQzMxsUCnzdOh5kkYU+leWdE611TIzM6temcuh742I\nFxo9EfE88L7qqmRmZtYZZYLgEEkrN3okrYKbTTMzs0GgTDA7Cbhd0uW5fy/gW9VVyczMrDPKPBhz\nvqR7gY8AAvaMiIcrr5mZmVnFyl7W/APwfGN4SetGxJOV1crMzKwDegyCkr4CHAM8Q2opRkAA7622\namZmZtUqcyZ4CPDPEfFc1ZUxMzPrpDJPhz4FvFh1RczMzDqtzJng48DNkq4B5jcSI+IHldXKzMys\nA8oEwSfzZ+n8MTMzGxTK/EXiOABJwyNibvVVMjMz6wy/Wd7MzGqrsjfLSzpH0rOSHiykHSvpr5Km\n5M8uhd+OlDRd0qOSdiqkj8lp0yUdUUhfX9Jdkh6T9PP8rkMkLZP7p+ffR/VUhpmZ1VOVb5afAIxp\nkf7DiNg8f64FkPQeYB9g4zzOTyQNlTQU+DGwM/Ae4NN5WIDv5rw2Iv2R/8CcfiDwfES8E/hhHq5t\nGSWmw8zMBqlSf5Eovlle0mGUeLN8RNwKzC5Zj92ASyNifkT8GZgObJU/0yPi8Yh4FbgU2E2SgI8C\njfZMzwN2L+R1Xu6+HNghD9+uDDMzq6n+eLP8wZKm5culjbdTrEX6P2LDjJzWLn1V4IWIeL0pfZG8\n8u8v5uHb5fUmksZLmixp8qxZs/o2lWZm9pbXbRDMlws/GxH7RsQaEbF6RHxmMVqPOR3YkBRIZ5Le\nUAGpKbZm0Yf0vuT15sSIMyJidESMHjlyZKtBzMxsEOg2CEbEAtJlxCUiIp6JiAURsRA4k67LkTOA\ndQqDrg083U3634ERkpZqSl8kr/z7SqTLsu3yMjOzmipzOfT3kk6T9EFJWzQ+fSlM0pqF3j2AxpOj\nVwP75Cc71wc2Au4G7gE2yk+CLk16sOXqiAjgJmBsHn8c8KtCXuNy91jgt3n4dmWYmVlNlWkx5v35\n+/hCWpAeTGlL0iXA9sBqkmaQ3kSxvaTN8/hPAF8EiIiHJF0GPAy8DhyUz0KRdDBwPTAUOCciHspF\n/DdwqaRvAvcDZ+f0s4ELJE0nnQHu01MZZmZWT90GQUlDgNMj4rLeZhwRn26RfHaLtMbw36LFG+vz\n3yiubZH+OC2e7oyIecBevSnDzMzqqad7gguBgztUFzMzs44qc09woqTDJK0jaZXGp/KamZmZVazM\nPcHP5e/ifwMD2GDJV8fMzKxzyrxFYv1OVMTMzKzTegyCkvZrlR4R5y/56piZmXVOmcuh/1Lofhuw\nA3Af4CBoZmYDWpnLoV8p9ktaCbigshqZmZl1SKlXKTV5hdTaipmZ2YBW5p7g/9HV0PQQ0nv9ev3n\neTMzs7eaMvcEv1/ofh34S0TMqKg+ZmZmHVMmCD4JzMzNkSFpWUmjIuKJSmtmZmZWsTL3BH8BLCz0\nL8hpZmZmA1qZILhURLza6MndS1dXJTMzs84oEwRnSdq10SNpN9JLbc3MzAa0MvcEvwRcJOm03D8D\naNmKjJmZ2UBS5s/yfwK2kbQ8oIiYU321zMzMqtfj5VBJ35Y0IiJejog5klbOb3M3MzMb0MrcE9w5\nIl5o9ETE88Au1VXJzMysM8oEwaGSlmn0SFoWWKab4c3MzAaEMg/GXAhMknQuqfm0zwHnVVorMzOz\nDijzYMz/SpoGfCwnnRAR11dbLTMzs+qVORMEuB8YRjoTvL+66piZmXVOmadD/w24GxgL/Btwl6Sx\nVVfMzMysamXOBL8B/EtEPAsgaSRwI3B5lRUzMzOrWpmnQ4c0AmD2XMnxzMzM3tLKnAn+RtL1wCW5\nf2/g2uqqZGZm1hllng49XNKewHaAgDMi4qrKa2ZmZlaxUk+HRsSVwJUV18XMzKyjfG/PzMxqy0HQ\nzMxqq20QlDQpf3+3c9UxMzPrnO7uCa4p6cPArpIuJT0U84aIuK/SmpmZmVWsuyB4NHAEsDbwg6bf\nAvhoVZUyMzPrhLZBMCIuBy6X9D8RcUIH62Q18bMLdqok3y9+1u27m1k5Zf4neIKkXYEP5aSbI+LX\n1VbLzMysemUa0D4ROAR4OH8OyWlmZmYDWpk/y38C2DwiFgJIOo/0OqUjq6yYmZlZ1cr+T3BEoXul\nMiNIOkfSs5IeLKStImmipMfy98o5XZJOkTRd0jRJWxTGGZeHf0zSuEL6lpIeyOOcIkl9LcPMzOqp\nTBA8Ebhf0oR8Fngv8O0S400AxjSlHQFMioiNgEm5H2BnYKP8GQ+cDimgAccAWwNbAcc0gloeZnxh\nvDF9KcPMzOqrxyAYEZcA25DaDr0S2DYiLi0x3q3A7Kbk3YDzcvd5wO6F9PMjuRMYIWlNYCdgYkTM\njojngYnAmPzbihFxR0QEcH5TXr0pw8zMaqpsA9ozgauXQHlr5LyIiJmSVs/pawFPFYabkdO6S5/R\nIr0vZcxsrqSk8aSzRdZdd91eTqKZmQ0Ub5W2Q9UiLfqQ3pcy3pwYcUZEjI6I0SNHjuwhWzMzG6g6\nHQSfaVyCzN+NN9bPANYpDLc28HQP6Wu3SO9LGWZmVlPdBkFJQ4pPdy4BVwONJzzHAb8qpO+Xn+Dc\nBngxX9K8HthR0sr5gZgdgevzb3MkbZOfCt2vKa/elGFmZjXV7T3BiFgoaaqkdSPiyd5kLOkSYHtg\nNUkzSE95fge4TNKBwJPAXnnwa4FdgOnAK8ABufzZkk4A7snDHR8RjYdtvkx6AnVZ4Lr8obdlmJlZ\nfZV5MGZN4CFJdwNzG4kRsWt3I0XEp9v8tEOLYQM4qE0+5wDntEifDGzSIv253pZhZmb1VCYIHld5\nLczMzPpBmQa0b5G0HrBRRNwoaTlgaPVVMzMzq1aZBrS/AFwO/CwnrQX8sspKmZmZdUKZv0gcBHwA\neAkgIh4DVu92DDMzswGgTBCcHxGvNnokLUXPf0w3MzN7yysTBG+R9HVgWUkfB34B/F+11TIzM6te\nmSB4BDALeAD4Iun/dkdVWSkzM7NOKPN06ML8CqW7SJdBH83/uTMzMxvQegyCkj4B/BT4E6kR6vUl\nfTEirut+TDMzs7e2Mn+WPwn4SERMB5C0IXANXc2UmZmZDUhl7gk+2wiA2eN0vZnBzMxswGp7Jihp\nz9z5kKRrgctI9wT3oqtBazMzswGru8uhnyp0PwN8OHfPAlaurEZmZmYd0jYIRoRfNWRmZoNamadD\n1we+AowqDt/Tq5TMzMze6so8HfpL4GxSKzELq62OmZlZ55QJgvMi4pTKa2JmZtZhZYLgjyQdA9wA\nzG8kRsR9ldXKzMysA8oEwU2BzwIfpetyaOR+MzOzAatMENwD2KD4OiUzM7PBoEyLMVOBEVVXxMzM\nrNPKnAmuAfxB0j0sek/Qf5EwM7MBrUwQPKbyWpiZmfWDMu8TvKUTFTEzM+u0Mi3GzCE9DQqwNDAM\nmBsRK1ZZMTMzs6qVORNcodgvaXdgq8pqZGZm1iFlng5dRET8Ev9H0MzMBoEyl0P3LPQOAUbTdXnU\nzMxswCrzdGjxvYKvA08Au1VSGzMzsw4qc0/Q7xU0M7NBqW0QlHR0N+NFRJxQQX3MzMw6prszwbkt\n0oYDBwKrAg6CZmY2oLUNghFxUqNb0grAIcABwKXASe3GMzMzGyi6vScoaRXgP4F9gfOALSLi+U5U\nzMzMrGrd3RP8HrAncAawaUS83LFamZmZdUB3f5b/GvAO4CjgaUkv5c8cSS91pnpmZmbVaRsEI2JI\nRCwbEStExIqFzwqL226opCckPSBpiqTJOW0VSRMlPZa/V87pknSKpOmSpknaopDPuDz8Y5LGFdK3\nzPlPz+OquzLMzKyeet1s2hL0kYjYPCJG5/4jgEkRsREwKfcD7AxslD/jgdPhjfuVxwBbk9oyPaYQ\n1E7PwzbGG9NDGWZmVkP9GQSb7UZ6+Ib8vXsh/fxI7gRGSFoT2AmYGBGz88M6E4Ex+bcVI+KOiAjg\n/Ka8WpVhZmY11F9BMIAbJN0raXxOWyMiZgLk79Vz+lrAU4VxZ+S07tJntEjvrgwzM6uhMm2HVuED\nEfG0pNWBiZL+0M2wapEWfUgvLQfm8QDrrrtub0Y1M7MBpF+CYEQ8nb+flXQV6Z7eM5LWjIiZ+ZLm\ns3nwGcA6hdHXBp7O6ds3pd+c09duMTzdlNFcvzNIfw1h9OjRfmPGIHHAVWN6HqgPzt3jN5Xka2bV\n63gQlDQcGBIRc3L3jsDxwNXAOOA7+ftXeZSrgYMlXUp6CObFHMSuB75deBhmR+DIiJid/8axDXAX\nsB9waiGvVmWYmfWbJ07+WyX5jjr07ZXkO5j0x5ngGsBV+V8LSwEXR8RvJN0DXCbpQOBJYK88/LXA\nLsB04BVS023kYHcCcE8e7viImJ27vwxMAJYFrssfSMGvVRlmZlZDHQ+CEfE4sFmL9OeAHVqkB3BQ\nm7zOAc5pkT4Z2KRsGWZmVk9vpb9ImJmZdZSDoJmZ1VZ//UXCbFD7xFXfqyzva/Y4vLK8zerGZ4Jm\nZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZb\nDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbDoJmZlZbfp+g2SDwycsvqizvX4/dt7K8zfqbzwTNzKy2\nHATNzKy2fDnU3nD92btUku9OB15bSb5mZovLZ4JmZlZbPhM0s17b/fJJleX9y7E7VJa3WTOfCZqZ\nWW05CJqZWW05CJqZWW05CJqZWW05CJqZWW05CJqZWW05CJqZWW05CJqZWW05CJqZWW05CJqZWW05\nCJqZWW05CJqZWW05CJqZWW3VMghKGiPpUUnTJR3R3/UxM7P+UbsgKGko8GNgZ+A9wKclvad/a2Vm\nZv2hju8T3AqYHhGPA0i6FNgNeLhfa2Vm1iHP/OiOSvJd45BtK8m3SoqI/q5DR0kaC4yJiM/n/s8C\nW0fEwYVhxgPjc+8/A4/2oajVgL8vZnVdnssbDOUN5mlzee2tFxEjl3RllrQ6ngmqRdoiRwIRcQZw\nxmIVIk2OiNGLk4fLc3mDobzBPG0ub+Cr3T1BYAawTqF/beDpfqqLmZn1ozoGwXuAjSStL2lpYB/g\n6n6uk5mZ9YPaXQ6NiNclHQxcDwwFzomIhyooarEup7o8lzeIyhvM0+byBrjaPRhjZmbWUMfLoWZm\nZoCDoJmZ1Vjtg6CkCfm/g0g6a0m0HiPp5W5+O1bSYb3M71hJh7Wqa6MsSe+QdHnu3l/SjZLe0dcy\n29Sj7XQVhhkl6cGS+TWma39Jp5Uc52ZJowv9b0ybpOMlfazw2whJ/6/Q/6yk9+fu23so5+uStpf0\n69x/crtxJF2by1qkvLIkXSJpmqT/6O24fShr/8Z6kftfy+vRG+tPq+H7Om2LWdeW87W4XJqGP1TS\ncs3jd5P/xZJ+2kMddm00rdjHbbflfJP0hKTV2qU3tqNW09Bqm+9N2fm35vWg231fb7bRnizO/qg3\n++vmaWyn9kGwKCI+HxEDouWY5rpGxNMRMbYwyLuBtiuAklLLX6mpube8iDg6Im4sJI0AijuB5YB/\nliRgux6y+3pT/xTgvjbl7hIRL7Qor0eS3g68PyLeGxE/LDlOnx5oy8txfxZdL+ZHxMMt1p+GxvC9\nnrbF1Zv5mqftUNIybh6/nR3p4eHAiLg6Ir5TvtZvsljzrbtp6GaZdVt2q/VgIO37GkrUeX+62QcW\nMxp0H+A/gQfz51BgFPAIcCbwEHADsGwedgIwNnffDIzO3S8D3wKmAncCa+T0kcAVpL9a3AN8oEX5\nLwNPAN8FZgKvkFqd2Rs4FjgM+AbwD2Ah8DpwF/CjPOzCPP4IYMXcPxF4DvgL8K5c14/lcZ8DXiD9\n6f8o4LScPheYB9wCXADcDrwK3ATcTwqUMwrDXZrrPzzX42/5t0tJAeCVnNeDwHzgD8ADwBF5Pt2R\n6zUv938n5/eFPK9eBh7L8/PBvDwOI62sp+VhP5XrNo/0/82HgcuB6Xne3wz8S+4/Nw/zN+C4vCyP\nA64CvgO8mOfdPOCXef4sKMzzycCT+fcX8vfteX4GMAuYket1FvAn4DfAn/NymAZMAp4itapxaV6m\nU4DvAWsCt+b+B4EPtlhXphXG+SCweZ4/0/J0rFxYN7+d5//XWuRzZmEaniWtazvk5fRM/uyfl8Fr\neZ7dn6fzX4EN8vgv5zzm5Hm0AHgceL44bbnMw/NynQYcl9NG0WZba6rvfwFfzd0/BH6bu3cALszd\nT+T5+n+5nq/nz8Ok9auxPF/Iy+xV0vp4E2n9eDKPPxy4Jo8zg7TOTM/jPpeH/xRpG7wfuJGu7X1/\nutbNY0nr64bAfYVp2Qi4t8U0bp+XxYI8TxvrxYO5rvcD99K1/b2Up28C8N/A7Lw8HsrD3ZDndQCX\n5Xk9nbRubAz8NeezIC/jJ/P3P/L3scDvgM/kfB/NdVqWRfd9Y0jb+1RgUov50OM+sLD8vgvcnT/v\nLM7Hpn3D1JzncsAKpG1sWB5mxZzXMFrsr0lP+U/I8/UB4D+Asc3T2DZe9HfAqiAAbplnxHBg+bwC\nvS+vXJvnYS4DPtNDEAzgU7n7f4GjcvfFwHa5e13gkW6C4MWk4DUur8BPAt8n7SD+AFych1+FtHG8\nAPwTaef+RF4pxpNW7KtyXc8k7ZBvBn5O2pgaZQRpZ7Y/aee3A2kFfxb4I6kJuIWkHeI7SEfB6+U6\nrE7aWN5L2inOAf4r/7ZSLm8BXRvAX3M918h12IPUKPlfgYca05W/Vy3M3/uBrwAfyvVqDoIrkzbu\nAE4ETgLOIf2l5dCcx0G5DquQNqjDc/qvSCv/H0k7h1F5nu4DfCnn+c1c54WkIHV67t6ZtL7MAw7O\ny/Ay4NeFIPhinhfXkJqRWgf4HOngYLVc3oOF9eBrwDdy91BghRbrSvM404AP5+7jgZML8+4n3az3\n40jrxtA87PtJwXkGKeCcX5h/TwNfyeMtJB0gfDIvxyOBU4GT8/y6HfhAi3ruSHp0XqQrSr/Oy3QU\nbba1pvpuA/wid99G2kkOA44BvtgUBLfLddmUtJOcn+sapO1mNnB2/l4t1+2Kwvj/mufNmqRtcANg\n6byszyqsd42n5T8PnNQuCObumwrT+O3G/GwRBOeTdsRDSTv6qbn7ybwc1sz1Wwhsm6frd8Ahefpm\n0tVsWWP6FgLvZNEgeGqe5jvzPNqGtL3+NddlLnBeoW43k/d1TQFlJGm9Wb9pGy7Ohx73gYXl11j/\n96NrWyrOx1ULw3+TrvXyXGD33D2+sDwm8OYguCUwsZDPiFbT2O4zGC+HbgdcFRFzI+Jl4ErSEfaf\nI2JKHuZe0grUnVdJG3bz8B8DTpM0hfQn+xUlrdAmj3nAJaSVZkvSUfxapEB3IfARSU+RjoT+nbTz\nOJu0M1mDFLwPoOsIF9LZSKMum5OO8DYAPkLaOObk354HXoyIf5COzP9M2iD/QgooW5F2YBMk/SOn\nv420IT6Qu9eX9MGIeBG4jrSzm0hqcFx5WublOizI8+ZMcjN0ETE712UTSbeRVti3AxtHxK3AMrmc\norVJO+zXgD1JR7gX5t/2y9+7kjaSfwO+SDry25h05gzprHdZ4AekwPYr0tljAH+KiGdy9+9yXgIO\nBD6R58M7cz6Nchtm5HmxNekscr1c1jK0dg9wgKRjgU0jYk6b4QCQtBJpA74lJ51HWhcaft7N6OuR\n5s9M0nqxHWmZv57Ha87ryvwddAWHpUhXGeYCHycdFC1DWm+a7Zg/95POGt5FOiOCctvavcCWeduZ\nT7qKMJq0rd7WYvgFpOVxB2k9fCqn/Yy03k8lHfhCOjA5tzDuA6R188fAwxHxeES8mqezYW3gekkP\nkA6qNm5Rh6KzSMt2KOms++I2w00BXouIBeQz8ty9kLSN3k7aZkRa75cinZFulus3lLQ+LgBmRUSj\nDc/mS6R3kObdCqT17gzSfFqYf3+NFJR6sg1wa0T8GRbZhot6sw+8pPDdqnXtTSTdluf7vnTN97NI\n+z7y97ktxm14HNhA0qmSxpDOqEsbjEGwVdugsOiGvICeGwp4LfLhRNPwQ4BtI2Lz/FkrIuZIul7S\nFElntckvmvr/Rgpup5N2XHeRduIHR8SGpAW5HGkjaFy+W450FrUN6UyuMa2bkY4Ah5CO4D8KrApc\nVLgx3Ch/bqH/ENKOZ92IWDb/tkJE/DHXaSpwoqSjSZeIIB2pP046M94x16GR9yLzXtK5eUO5jnR2\nNZm0M24OfKsAe+dhryDtDGaSAlxj2FdIO5ERwCakwH5Yzu8HpLOzxr3Ls/OwjUt61xTrJmn9PK+O\nIm14s/Kw3yRdeomc18nABwsP4iwo1LnHdSgH+g+Rzo4vkLSfpD3yejKl+IBPSXNz/YcW8jg+T89+\npED0n6R1fZvm8ZoUt4elSOvbq6T1aGPSJcfI3a3qKeDEwnbwzog4u0XeC4ClJK1TqPOXIqKxUz6A\nFAhuIx3IbUhatkXvo2u724yu5TovfyvXfwEp+G9NWucgnbFdBvyCdIDzvrw+NzuVdKazKYuud+1c\nQbp68EnSpdDnJG1dmMZd83DN233D20hXXyaQ1o8XSZcGXyWdEUeezuVJB8iPNOU1JH/WIS2fd5HW\n8/VIBwOHFuZNQ3H9bae4PbfTm31gtOlumEDa521KupXxNoCI+D0wStKHgaER0fZhu4h4nrQPvJl0\nlajdPrjtxAw2twK7S1pO0nDSZbpWR5Z9dQNphw6ApM0BImKnvEJ8vjDsMNJR4j6kI9/GDvGPpA1t\nOGmBnZl/GwosI2kYaWVYhUWPgF4h3TecTLqUMoW0AxtC147uvcBvSSv84aSjzXcB65OO6oeSLtPc\nQ7r0MQ94TtKeuT7kwBmke3HfB7ag60z0LNKZxT6SNs3TuHRh3nyBrg3vaxGxea73zJy+by5ju1z2\nPNJlnp/nYeeSDhDWJQU5gE+TztrOIt3HvJG0c5hL2uEuT9ohNbxECmw75unfnHSQIdKR8np5uJtJ\nZ+UjSZfNz8jdj+V63Q3cFhGTWdTtdN1w3zcPC2mn9cYRsaT1gGcj4kxSYN4iIq4q7DwWyTefZT4v\n6YM56bOkqwc0DbegkMfRpMA9P8+3iaTlvBHpDGypprzm0Hq7X5m0nFYEfkpa5svlPDdpnjbS1YTP\nSVo+T+taklZvkW+jzk8V6tx4IvNW0jK+lbSNfgmYUjj4bBia83hF0rtIl6RbmUU6M7wsn21BCqy7\nAEeTbmsI2LqwjQ3Lw61E2jYhXVruVkTMy/PgdPI2GhF3Faax0RTjZsCI/BDaSGD1fPY4hLTePkEK\nViuR5vkypEB+D+mqyTDS5dI8tjEqAAAGKklEQVQXgFUlrUo6KP4wXVd+JpOCSeO+4i9Jl8OH0j6Y\nNy/PhjuAD+cDKySt0mKY3uwD9y58t3p/0wrAzLw89m367XzSGWR3Z4Hkp2yHRMQVwP+Q5iO0n8ZF\nDLpm0yLiPkkTSDswSDvO55dgEV8FfixpGmn+3UraeFuZTrov9zPS5Zv/Iu3E/0baCDbP3a+RVtxN\nSDuD10hBbgPSSnBim/zPJG0gfyNduoL0wM0qpKB7MV2XVx4l3VvbgHTk9bSk75GOxOfSdTYE6d7L\nmqSj6HnAlyPiVUnzSTvJXUmXQn9DCmBHAyeQdqKPANvls7prSU9Z/k+uw8p5nuxCOnpt1WbrscBP\ncl5rky4FPUfa2bxG2rCvjoipku4nPf02C/g96cga0oq/Ya7ra6T7e1uQdhrfy98LSAcLw3L3cXnc\nW0nLeC7psk8jwBV9lbTjOZu0E5sNkM8Gfq/0F5HrSDfqD5fUqMN+LfJqNg74qdKj/o/TdUmorTwv\n/kpaz+fn6buCtC5dm6fpTlJwm0F68OEmSVsVsnk7KegdRdoRN86yXgB2Iu2Y72xMW0QcLundwB3p\nYVteJj1wUeZso+E20vp6R0TMlTSP1ges1wLfy7//hXTW1MoP8mf7pvRN6Vruc0nb2Y2kQLOrpJtI\n690v8ny8k3TQ2JOLSJfsb+hmmDtI6+IrpOcAJpKusIwgbZOHkQ6oFpAeMAvSvfLrSdvShqRguYC0\nLd1C2necxaKveNubtC8YTrqneXue3sfyMmsOhhNI69k/KFymjIhZSq+SuzIH7mdJl8aLerMPXEbS\nXbnOn27xe2Pf8BfSgXYxaF1EujpzSYvxitYCzi087X5kq2nMt4berKebhv70+QGdJ4DVFmP8scAF\nPQyzDLBU7t6WdBRd5TQdBpywGOPfTIkb1TQ9hFFIH006MytT1mnAgSWG2558w77un06vTxXUv/T6\nsYTK63Z7KLtudTcc6bmEHfp73vZx/lS+D1wSn0F3JjgYSDqVdHlvlx4GXRe4LB8BvUq6FFlVna4i\nHZV+tKoyeij/CODLvPmSSathG4+df63qeg0yHVuflrTerB9LqLxKtwelP8jfDUyNiElVlPFW1ot9\n4OKXlSOumZlZ7QzGB2PMzMxKcRA0M7PachA0M7PachA0G8RUsiV9s7pyEDR7C+rrmyJa2J8yLemb\n1ZSDoNkSIGm4pGskTVV6D9zeknaQdL+kBySdI2mZPOwb75KTNFrSzbn7WElnSLoBOD83j/b9PP40\nSV/Jw20p6RZJ9+amqtZsU6expP/OXZSbs/pEfrS/8fvHJV2Zu1+WdJKk+yRNkjQyp28o6Te5rNty\niy1mg4aDoNmSMQZ4OiI2i4hNSK3pTAD2jtQu4lKk/7H1ZEtgt4j4d1Lr+esD74uI95KC2TBSO5dj\nI2JLUitA32qVUURcTmrZZt9ITdJdC7y7EeBYtGHi4aTXA21BapXkmJx+Bqll/y1Jfw7/Sam5YTZA\n+M/yZkvGA8D3JX2X1MrHS6S3Kfwx/34eqXHfk3vI5+roat7pY8BPI+J1SC36S9qE1OzXxNxc2VBS\nu6w9ioiQdAHwGUnnklqFaTTltpCut1RcSGo2a3lSG5S/yGVB+zdmmA1IDoJmS0BE/FHSlqQWLk6k\n+/YkX6frKkxzm47FNz60atFfpPc1tnotTRnnkl5SO4/0Pr/X2wwXuY4v5LNIs0HJl0PNloD8BOYr\nEXEh6c0b7ye9CqbxbsLiGyGeIF32hNTocTs3AF9qPCSTW/R/FBgpaducNkxSd+++W6Ql/Yh4mtQ4\n+1Gky7UNQ0htNUJ6dc/vIuIl4M+S9splSdJm3ZRlNuA4CJotGZsCd+e3Z3yDFGQOIF1KfIB0ubHx\nCqHjgB8pvWi4u7cunEV62e00SVOBf4/0MtixwHdz2hRSwG1nAqkl/SmSls1pFwFPRcTDheHmAhvn\ndlc/SnqrPaS2OA/MZT1EeqGy2aDhtkPNakbSacD90fUSXCS9HBHL92O1zPqFg6BZjRTesPHxiJhf\nSHcQtFpyEDQbBCT9GPhAU/KPIqLbt3Kb1Z2DoJmZ1ZYfjDEzs9pyEDQzs9pyEDQzs9pyEDQzs9py\nEDQzs9r6/2/3XOJS+LYQAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x25be929ab00>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(df_train_merged['source_type']);\n",
    "plt.xlabel('source_type');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#这两处缺失值太多，舍弃处理\n",
    "df_train_merged.drop(['gender', 'lyricist'], axis = 1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    7.377418e+06\n",
       "mean     7.511399e+00\n",
       "std      6.641625e+00\n",
       "min      1.000000e+00\n",
       "25%      1.000000e+00\n",
       "50%      5.000000e+00\n",
       "75%      1.300000e+01\n",
       "max      2.200000e+01\n",
       "Name: city, dtype: float64"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_merged['city'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAEKCAYAAAC7c+rvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xu4HFWZ7/HvjwQQUUiAwGCCBpzo\nERmNuEWUEZGMEJAhgODgUYnATJRDEBz1yMUjCl5ARJRB46AEghcChlvEaIgZLs6MhIR7wkU2EGGT\nmASCEEBhAu/5o1aTSqcvVTvpXUn693mefnb1qvXWWt279n67Vq2uUkRgZmZWhU2q7oCZmXUvJyEz\nM6uMk5CZmVXGScjMzCrjJGRmZpVxEjIzs8o4CZmZWWWchMzMrDJOQmZmVpnBVXdgfbfddtvFyJEj\nq+6GmdkG5bbbbnsiIoa1q+ck1MbIkSOZN29e1d0wM9ugSPpjkXoejjMzs8o4CZmZWWWchMzMrDJO\nQmZmVhknITMzq4yTkJmZVcZJyMzMKuMkZGZmlXESMjOzyviKCQUtm/TT0jHDjvt4B3piZrbx8JGQ\nmZlVxknIzMwq4yRkZmaVcRIyM7PKOAmZmVllOpaEJO0k6QZJ90laIOnEVP4VSY9LujM9DszFnCKp\nV9IDkvbPlY9NZb2STs6V7yxpjqQHJV0uabNUvnl63pvWj2zXhpmZDbxOHgmtBD4XEW8B9gSOl7Rr\nWndeRIxOjxkAad2RwFuBscAPJA2SNAj4PnAAsCvw0dx2zk7bGgU8BRybyo8FnoqIvwXOS/WattG5\nt8DMzFrpWBKKiMURcXtaXgHcBwxvETIOmBoRL0TEI0AvsEd69EbEwxHxIjAVGCdJwL7AtBQ/BTgk\nt60paXkaMCbVb9aGmZlVYEDOCaXhsHcAc1LRREl3S5osaWgqGw48lgvrS2XNyrcF/hwRK+vKV9tW\nWv90qt9sW/X9nSBpnqR5y5YtK/16zcysmI4nIUmvAa4EToqIZ4BJwBuB0cBi4Nxa1Qbh0Y/y/mxr\n9YKICyOiJyJ6hg0b1iDEzMzWhY4mIUmbkiWgn0XEVQARsSQiXoqIl4EfsWo4rA/YKRc+AljUovwJ\nYIikwXXlq20rrd8aWN5iW2ZmVoFOzo4TcBFwX0R8J1e+Y67aocD8tDwdODLNbNsZGAXcCswFRqWZ\ncJuRTSyYHhEB3AAcnuLHA9fmtjU+LR8O/Eeq36wNMzOrQCcvYLoX8AngHkl3prJTyWa3jSYbBlsI\nfAogIhZIugK4l2xm3fER8RKApInATGAQMDkiFqTtfRGYKulrwB1kSY/08yeSesmOgI5s14aZmQ08\nZQcI1kxPT0/MmzfPV9E2MytB0m0R0dOunq+YYGZmlXESMjOzyjgJmZlZZZyEzMysMk5CZmZWGSch\nMzOrjJOQmZlVxknIzMwq4yRkZmaVcRIyM7PKOAmZmVllnITMzKwyTkJmZlYZJyEzM6uMk5CZmVXG\nScjMzCrjJGRmZpVxEjIzs8o4CZmZWWWchMzMrDJOQmZmVhknITMzq4yTkJmZVcZJyMzMKuMkZGZm\nlXESMjOzyrRNQpK2lLRJWn6TpIMlbdr5rpmZ2cauyJHQzcCrJA0HZgNHA5d0slNmZtYdiiQhRcTz\nwGHAv0XEocCune2WmZl1g0JJSNJ7gI8Bv0plgzvXJTMz6xZFktBJwCnA1RGxQNIuwA3tgiTtJOkG\nSfdJWiDpxFS+jaRZkh5MP4emckk6X1KvpLsl7Z7b1vhU/0FJ43Pl75R0T4o5X5L624aZmQ28tkko\nIm6KiIOBC9LzhyPiMwW2vRL4XES8BdgTOF7SrsDJwOyIGEV2junkVP8AYFR6TAAmQZZQgNOBdwN7\nAKfXkkqqMyEXNzaVl2rDzMyqUWR23Hsk3Qvcl56/XdIP2sVFxOKIuD0tr0jxw4FxwJRUbQpwSFoe\nB1wamVuAIZJ2BPYHZkXE8oh4CpgFjE3rtoqI30dEAJfWbatMG2ZmVoEiw3HfJUsETwJExF3A3mUa\nkTQSeAcwB9ghIhanbS0Gtk/VhgOP5cL6Ulmr8r4G5fSjDTMzq0ChL6tGxGN1RS8VbUDSa4ArgZMi\n4plWVRs13Y/ylt0pEiNpgqR5kuYtW7aszSbNzKy/iiShxyS9FwhJm0n6PGlorp30pdYrgZ9FxFWp\neEltCCz9XJrK+4CdcuEjgEVtykc0KO9PG6uJiAsjoicieoYNG1bkpZqZWT8USUKfBo5n1fDX6PS8\npTRT7SLgvoj4Tm7VdKA2w208cG2u/Kg0g21P4Ok0lDYT2E/S0DQhYT9gZlq3QtKeqa2j6rZVpg0z\nM6tA2+/7RMQTZN8RKmsv4BPAPZLuTGWnAmcBV0g6FngUOCKtmwEcCPQCz5NdmYGIWC7pTGBuqndG\nRCxPy8eRXb1hC+DX6UHZNszMrBptk5CkKcCJEfHn9HwocG5EHNMqLiL+k8bnYADGNKgfNDnCiojJ\nwOQG5fOA3RqUP1m2DTMzG3hFhuPeVktAAGma9Ds61yUzM+sWRZLQJrkvh9a+POrL9piZ2VorkkzO\nBf5b0rT0/Ajg653rkpmZdYsiExMulXQb8AGyczyHRcS9He+ZmZlt9IoOq90PPFWrL+n1EfFox3pl\nZmZdocjsuBPILiC6hOxKCSK7ysDbOts1MzPb2BU5EjoReHOa9mxmZrbOFLpsD/B0pztiZmbdp8iR\n0MPAjZJ+BbxQK6y7FI+ZmVlpRZLQo+mxWXqYmZmtE0WmaH8VQNKWEfFc57tkZmbdomN3VjUzM2tn\nQO6samZm1kjH76xqZmbWTJGJCavdWRX4DAXvrGpmZtZKx+6samZm1k7LIyFJg4BPRER/7qxqZmbW\nUssjoYh4CRg3QH0xM7MuU+Sc0H9JugC4HHjle0IRcXvHemVmZl2hSBJ6b/p5Rq4sgH3XfXfMzKyb\ntDsntAkwKSKuGKD+mJlZF2l3TuhlYOIA9cXMzLpMkSnasyR9XtJOkrapPTreMzMz2+gVOSd0TPqZ\n/25QALus++6YmVk3KXIV7Z0HoiNmZtZ92iYhSUc1Ko+IS9d9d8zMrJsUGY57V275VcAY4HbAScjM\nzNZKkeG4E/LPJW0N/KRjPTIzs65R6FYOdZ4HRq3rjpiZWfcpck7ol2Sz4SBLWrsC/vKqmZmttSLn\nhL6dW14J/DEi+jrUHzMz6yJFhuMeBeZExE0R8V/Ak5JGtguSNFnSUknzc2VfkfS4pDvT48DculMk\n9Up6QNL+ufKxqaxX0sm58p0lzZH0oKTL0w33kLR5et6b1o9s14aZmVWjSBL6BfBy7vlLqaydS4Cx\nDcrPi4jR6TEDQNKuwJHAW1PMDyQNSvcz+j5wANkw4EdTXYCz07ZGAU8Bx6byY4GnIuJvgfNSvaZt\nFHgdZmbWIUWS0OCIeLH2JC1v1i4oIm4GlhfsxzhgakS8EBGPAL3AHunRGxEPp3anAuMkiewq3tNS\n/BTgkNy2pqTlacCYVL9ZG2ZmVpEiSWiZpINrTySNA55YizYnSro7DdcNTWXDgcdydfpSWbPybYE/\nR8TKuvLVtpXWP53qN9vWGiRNkDRP0rxly5b171WamVlbRZLQp4FTJT0q6VHgi8Cn+tneJOCNwGhg\nMXBuKleDutGP8v5sa83CiAsjoicieoYNG9aoipmZrQNFvqz6ELCnpNcAiogV/W0sIpbUliX9CLgu\nPe0DdspVHQEsSsuNyp8AhkganI528vVr2+qTNBjYmmxYsFUbZmZWgbZHQpK+IWlIRDwbESskDZX0\ntf40JmnH3NNDgdrMuenAkWlm285kX4a9FZgLjEoz4TYjm1gwPSICuAE4PMWPB67NbWt8Wj4c+I9U\nv1kbZmZWkSLfEzogIk6tPYmIp9LU6i+1CpJ0GbAPsJ2kPuB0YB9Jo8mGwRaShvUiYoGkK4B7yb6L\ndHxEvJS2MxGYCQwCJkfEgtTEF4GpKSHeAVyUyi8CfiKpl+wI6Mh2bZiZWTWKJKFBkjaPiBcAJG0B\nbN4uKCI+2qD4ogZltfpfB77eoHwGMKNB+cM0mN0WEX8FjijThpmZVaNIEvopMFvSxWRHMMewagq0\nmZlZvxWZmPAtSXcD/5CKzoyImZ3tlpmZdYMiR0KQnXPZlOxI6I7OdcfMzLpJkdlxHyGbRXY48BFg\njqTDW0eZmZm1V+RI6DTgXRGxFEDSMOC3rLpkjpmZWb8UuWLCJrUElDxZMM7MzKylIkdCv5E0E7gs\nPf8nGkyZNjMzK6vI7LgvSDoM+Huy669dGBFXd7xnZma20Ss0Oy4irgKu6nBfzMysy/jcjpmZVcZJ\nyMzMKtM0CUmanX6ePXDdMTOzbtLqnNCOkt4PHCxpKnU3hYuI2zvaMzMz2+i1SkJfBk4mu/nbd+rW\nBbBvpzplZmbdoWkSiohpwDRJ/y8izhzAPpmZWZco8j2hMyUdDOydim6MiOtaxZiZmRVR5AKm3wRO\nJLsj6b3AianMzMxsrRT5suqHgNER8TKApClkt3M4pZMdMzOzjV/R7wkNyS1v3YmOmJlZ9ylyJPRN\n4A5JN5BN094bHwWZmdk6UGRiwmWSbgTeRZaEvhgRf+p0x8zMbONX9AKmi4HpHe6LmZl1mUJJyMw2\nXAdN+0XpmOsOP6IDPTFbky9gamZmlWmZhCRtImn+QHXGzMy6S8sklL4bdJek1w9Qf8zMrIsUOSe0\nI7BA0q3Ac7XCiDi4Y70yM7OuUCQJfbXjvTAzs65U5HtCN0l6AzAqIn4r6dXAoM53zczMNnZFLmD6\nL8A04N9T0XDgmk52yszMukORKdrHA3sBzwBExIPA9u2CJE2WtDQ/u07SNpJmSXow/RyayiXpfEm9\nku6WtHsuZnyq/6Ck8bnyd0q6J8WcL0n9bcPMzKpRJAm9EBEv1p5IGkx2Z9V2LgHG1pWdDMyOiFHA\n7PQc4ABgVHpMACaltrYBTgfeDewBnF5LKqnOhFzc2P60YWZm1SmShG6SdCqwhaQPAr8AftkuKCJu\nBpbXFY8DpqTlKcAhufJLI3MLMETSjsD+wKyIWB4RTwGzgLFp3VYR8fuICODSum2VacPMzCpSJAmd\nDCwD7gE+BcwAvtTP9nZI16GrXY+uNqw3HHgsV68vlbUq72tQ3p82zMysIkVmx72cbmQ3h2wY7oF0\n9LEuqVHT/SjvTxtrVpQmkA3Z8frX+3u6ZmadUmR23IeAh4DzgQuAXkkH9LO9JbUhsPRzaSrvA3bK\n1RsBLGpTPqJBeX/aWENEXBgRPRHRM2zYsFIv0MzMiisyHHcu8IGI2Cci3g98ADivn+1NB2oz3MYD\n1+bKj0oz2PYEnk5DaTOB/SQNTRMS9gNmpnUrJO2ZZsUdVbetMm2YmVlFilwxYWlE9OaeP8yqo4um\nJF0G7ANsJ6mPbJbbWcAVko4FHgVq14ufARwI9ALPA0cDRMRySWcCc1O9MyKiNtnhOLIZeFsAv04P\nyrZhZmbVaZqEJB2WFhdImgFcQXYO5QhWJYWmIuKjTVaNaVA3yL6P1Gg7k4HJDcrnAbs1KH+ybBtm\nZlaNVkdC/5hbXgK8Py0vA4auWd3MzKycpkkoIjxcZWZmHdX2nJCknYETgJH5+r6Vg5mZra0iExOu\nAS4iu0rCy53tjpmZdZMiSeivEXF+x3tiZmZdp0gS+p6k04HrgRdqhRFxe8d6ZWZmXaFIEvo74BPA\nvqwajov03MzMrN+KJKFDgV3yt3MwMzNbF4okobuAIRS4SoKtv373o4NK1X/fv1zXoZ6Yma1SJAnt\nANwvaS6rnxPyFG0zM1srRZLQ6R3vhZmZdaUi9xO6aSA6YmZm3afIFRNWsOrmb5sBmwLPRcRWneyY\nmZlt/IocCb02/1zSIcAeHeuRmZl1jSI3tVtNRFyDvyNkZmbrQJHhuMNyTzcBelg1PGdmZtZvRWbH\n5e8rtBJYCIzrSG/MzKyrFDkn5PsKmZlZR7S6vfeXW8RFRJzZgf6YmVkXaXUk9FyDsi2BY4FtASch\nMzNbK61u731ubVnSa4ETgaOBqcC5zeLMzMyKanlOSNI2wL8CHwOmALtHxFMD0TEzM9v4tTondA5w\nGHAh8HcR8eyA9crMzLpCqy+rfg54HfAlYJGkZ9JjhaRnBqZ7Zma2MWt1Tqj01RTMzMzKcKIxM7PK\nOAmZmVllnITMzKwyTkJmZlYZJyEzM6uMk5CZmVWmyK0c1jlJC4EVwEvAyojoSVdnuBwYSXa7iI9E\nxFOSBHwPOBB4HvhkRNyetjOe7HtMAF+LiCmp/J3AJcAWwAzgxIiIZm10+OWuEw9eUO7uGaMmXtuh\nnpiZrTtVHgl9ICJGR0RPen4yMDsiRgGz03OAA4BR6TEBmASvXFLodODdZLcbP13S0BQzKdWtxY1t\n04aZmVVgfRqOG0d2fTrSz0Ny5ZdG5hZgiKQdgf2BWRGxPB3NzALGpnVbRcTvIyKAS+u21agNMzOr\nQFVJKIDrJd0maUIq2yEiFgOkn9un8uHAY7nYvlTWqryvQXmrNlYjaYKkeZLmLVu2rJ8v0czM2qnk\nnBCwV0QskrQ9MEvS/S3qqkFZ9KO8sIi4kOzCrfT09JSKNTOz4ipJQhGxKP1cKulqsnM6SyTtGBGL\n05Da0lS9D9gpFz4CWJTK96krvzGVj2hQnxZtmFmHfPjKuaVjrvzwuzrQE1sfDXgSkrQlsElErEjL\n+wFnANOB8cBZ6Wdtetd0YKKkqWSTEJ5OSWQm8I3cZIT9gFMiYnm60veewBzgKODfcttq1IaZmTWw\n9IKZpepvP3H/UvWrOBLaAbg6m3nNYODnEfEbSXOBKyQdCzwKHJHqzyCbnt1LNkX7aICUbM4Eah+z\nzoiI5Wn5OFZN0f51ekCWfBq1YWZmFRjwJBQRDwNvb1D+JDCmQXkAxzfZ1mRgcoPyecBuRdswM7Nq\nrE9TtM3MrMs4CZmZWWWchMzMrDJOQmZmVhknITMzq4yTkJmZVcZJyMzMKuMkZGZmlXESMjOzyjgJ\nmZlZZZyEzMysMk5CZmZWmapuamcbmGsmH1Cq/iHH/Lp9JTPrej4SMjOzyjgJmZlZZTwcZ2YtjZtW\n7s6aANceXu7umta9fCRkZmaVcRIyM7PKOAmZmVllnITMzKwyTkJmZlYZJyEzM6uMp2ibrecOuvLi\n0jHXffjoDvTEbN3zkZCZmVXGScjMzCrjJGRmZpXxOSHb6B199djSMRcf+psO9MTM6jkJ2Xrv7Knl\nr0P2xSPLX+/MrJHZP19WOmbM/x7WgZ5snJyEzNo48JqTS8fMOOSsV5Y/dNV3S8f/6rCTSseYNbLk\nu3NL1d/hpHd1qCeNOQkNkCWTvlGq/g7HndqhnpiZrT+6MglJGgt8DxgE/DgizmoTYmYbqClXlR9O\nG3+Yh9MGStclIUmDgO8DHwT6gLmSpkfEvdX2bON20aX7lap/7FHXd6gnZrY+6cYp2nsAvRHxcES8\nCEwFxlXcJzOzrtR1R0LAcOCx3PM+4N0V9cXM2vjM1Y+1r1Tn/EN36kBP+mf+vy8pHbPbp3Z4ZflP\n5/yxdPzffOENpWOqooioug8DStIRwP4R8c/p+SeAPSLihFydCcCE9PTNwAMtNrkd8MRadMnxjt9Q\n4zfkvju+8/FviIi2J9e68UioD8h/TBoBLMpXiIgLgQuLbEzSvIjo6W9nHO/4DTV+Q+6746uPr+nG\nc0JzgVGSdpa0GXAkML3iPpmZdaWuOxKKiJWSJgIzyaZoT46IBRV3y8ysK3VdEgKIiBnAjHW0uULD\ndo53/EYYvyH33fHVxwNdODHBzMzWH914TsjMzNYTTkIFSJosaamk+U3WS9L5knol3S1p97r1r5J0\nq6S7JC2Q9NUG29hc0uVpG3MkjWxQZ5CkOyRdVzZe0kJJ90i6U9K8fryGIZKmSbpf0n2S3lM0XtKb\nU7u1xzOSTioan9Z/Nr138yVdJulVRV+/pBNT3IL6dpu13eh3LmkbSbMkPZh+Dq3fVqo3XtLTklZK\n6suVH5H68LKkprOKJI3Nxf8pV35Oev/vlnS1pCEl489MsXdKul7S68r0P7f+85JC0nZN4m9OsS9I\nGp/KviLp8dw+cGCZvqd1J0h6IL2H3yr52i/Ptb1Q0p0l40dLuiXFz5O0R5n3TtLbJf1e2d/gLyVt\n1aL9hyQ9n/a/BZJOTOuK7n+fTfEvSurLxRfd/z6ei1+Siy+z/z2Q/p7aX/03Ivxo8wD2BnYH5jdZ\nfyDwa0DAnsCcuvUCXpOWNwXmAHvW1fk/wA/T8pHA5Q3a+Vfg58B1Dda1jAcWAtu1eI3tXsMU4J/T\n8mbAkDLxuXqDgD+RfYegUDzZF4wfAbZIz68APlnk9QO7AfOBV5OdA/0tMKpd241+58C3gJPT8snA\n2Q1e3zbAw8CHgPcDLwJD07q3kH3v7Eagp8X78xDwT2RX9/gLsGtatx8wOC2f3aT9VvFb5ep9pvZ+\nFe1/Wr8T2aSePzban1L8ImAf4N60raHAV4DPt/k7a9X3D6Tf3ebp+fZl4uvqnQt8uWT71wMH5PaX\nG0v+7ucC70/LxwBntmj/3an9u4Ae4A/AriX2vz+m938o2d9Nb4ovuv8tBP6R7O/8nvR815L73y4p\n/q5Gv4P8w0dCBUTEzcDyFlXGAZdG5hZgiKQdc/EREc+mp5umR/3JuHFk/+gBpgFjJKm2UtIIsp37\nxy360DS+gKavIX1q2xu4KL2eFyPiz0Xj64wBHoqI+q+Bt4sfDGwhaTBZQlnUIL7R638LcEtEPB8R\nK4GbgEPbtQ08yJq/83wbU4BDGry+/YFZEfErsn8GzwJjASLivoho9cVnWHVZqcuBpcDTqV0i4vr0\nGgBuIfuOW5n4Z3L1tmTNfbBl/5PzgP/bJLYW/0uyf1wvA7Pq4ltp2nfgOOCsiHghvZalJeOB7KgX\n+AhwWcn4AGpHL1uz5v4Hrd+7NwM3p+VZwIdbtD8nIm4lu6TYB4H7yD6IFd3/fhMRN0bEU2TJ88/A\n8BL73wMR8cvILmv2c+D5FF9m/yt8WTQnoXWj0aWAhucrKBtKu5Ns554VEXOabSP9op8Gts2t/y7Z\nH//L7frQJD6A6yXdpuyKEGVewy7AMuBiZcOBP5a0ZYn4vCNp/A+gaXxEPA58G3gUWAw8HRH1Vzht\n9vrnA3tL2lbSq8k+xdZf06Vo33eIiMWpjcXA9gVex/802VYzReOPITt6KxUv6euSHgM+Bny5TLyk\ng4HHI+KuEv3Pv5cT01DO5CZDSa36/ibgfcqGWm+S1OimN0Xeu/cBSyLiwZLxJwHnpPfu28ApJePn\nAwen5SNYcx9sFN8H/C/gHWRH5/3Z/54F3pjii6iP/2vqa3180f2v2d/SK5yE1o1GRxyrfVKMiJci\nYjTZp4c9JO1WdBuSDgKWRsRta9GHvSJid+AA4HhJe5eIH0w2NDUpIt4BPEc2HFCmfZR9Ofhg4Bdl\n+p/+YY0DdgZeB2wp6eNF4iPiPrKhg1nAb8iGB1bW1Wvb9xLWdltF3sfTyF7Dz8rGR8RpEbFTip1Y\nND4l8NNonLiKtD+J7J/haLIPEueWiIVsHxxKNlz6BeCKBkf6Rd77j9L4Q1C7+OOAz6b37rOkUYES\n8ceQ/d3dBryWbKiuXfzmZH+vJ9UdxbaSHz15DdmHvmvXIv4zwM35+LXZ/xpxElo32l4KqCYNY93I\nmkMUr2wjDTltzarhoL2AgyUtJDu83VfST0vEExGL0s+lwNVkh81FX0Mf0Jc7eptGlpSKxtccANwe\nEY2u6Ngq/h+ARyJiWUT8D3AV8N5m8fWvPyIuiojdI2LvVFb/Kbjo729JbohyR7Kj2navY9Mm22qm\nZbyyE/0HAR+LNAjfz/Z/TuMhoWbxbyT7EHBX2g9HALdL+ps28SOARRGxJH0Qexn4EWvuf+363gdc\nlYZMbyUbEaifGNHuvRsMHAZc3qDtdvHjyfY7yD5Elep/RNwfEftFxDvJkuBDreIlbUo28jE3Imrt\nFt7/UvyVZMOCZe51Xx9/L/CftZX92P+a/i98RasTRn6sdsJtJM0nJnyI1U9s31q3fhjpRD6wBfA7\n4KC6Osez+on1K5q0tQ+NJyY0jScb/39tbvm/gbElX8PvgDen5a8A55SJT3WmAkeXfQ/JTtQuIDsX\nJLLx8BNKvP7t08/XA/eTO9Hequ363zlwDqufGP5Wg9exDdnJ4KHA28g+8W5TV+dGmp8YHkx2cntn\nYBTZyfG3pnVjyf4pDGuxn7aKH5WrdwIwrT/9T/UW0nxiwiMp9t60vA2wY67OZ4GpJfv+aeCMtPwm\nsiEfFY3PvX839fO9uw/YJy2PAW4r897l9sFNgEuBY9q0/1OyIfB8/8vsf1PJjj4f6ef+dxVwPtnI\nQX/3v83y8U1jWq3045U39jKyIYT/Icv0x6Y/ik+n9SK7Ud5DZLNJeuri3wbcAdxNNjb85VR+BnBw\nWn4V2SesXuBWYJcmfdmHlISKxpOd07krPRYAp6XyMq9hNDAvvYZr0h9amfhXA08CW+fKysR/lSyB\nzAd+QjZUUfT1/y798dwFjCnSdpPf+bbAbLIjqdms+gfTQ3aH3lp7xwAryIYsVubiD03LLwBLgJmp\n/uuAGbn4A4FnUuxLufhesn++d6bHD0vGX5nev7vJJg8ML9P/ut/HQlISahA/J8UF2ZHnsel3dk9q\nezopKZXo+2Zk/5jnA7cD+5aJT+suqf3Oc/WLtv/3wG1k+9Ac4J0lf/cnks1y+wNwFqsuFNCo/UfT\ne/en3O/6QIrvf99M8S+kbdXii+5/n8/FL87Fl9n//kD293Rau/+vvmKCmZlVxueEzMysMk5CZmZW\nGSchMzOrjJOQmZlVxknIzMwq4yRktgGR9GlJR6XlT6rJlbDNNhSeom22gZJ0I9mVqde4NYfZhsJJ\nyGw9lo56al8evJvsC4DPkn1Z9BLgcbJv9p9GdquNQ1PcB4HjIuKwge+1WXEejjNbT0l6K1ly2Tci\n3k72rXsAImIa2RUsPhbZhXFnAG+RNCxVORq4eIC7bFaak5DZ+mtfsuu7PQEQEU3vaRXZkMZPgI+n\nO16+h8aX2jdbrwyuugNm1pQodxuIi8muCfdX4Bex6gZkZustHwmZrb9mAx+RtC2ApG3q1q8guzcN\n8MrtOhYBXyI7X2S23vORkNlBL9coAAAAfUlEQVR6KiIWSPo6cJOkl8iuxL4wV+US4IeS/gK8JyL+\nQnajsWERce9A99esPzw7zmwjIukC4I6IaHTnT7P1jpOQ2UYi3Tr6OeCDEfFC1f0xK8JJyMzMKuOJ\nCWZmVhknITMzq4yTkJmZVcZJyMzMKuMkZGZmlXESMjOzyvx/83VEZB6nGEYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x25bded0e358>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(df_train_merged['city']);\n",
    "plt.xlabel('city');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\ntotal = len(df_train_merged)\\nplt.figure(figsize=(15,19))\\n\\nplt.subplot(311)\\ng = sns.countplot(x=\"city\", data = df_train_merged)\\ng.set_title(\"City Count Distribution\", fontsize=20)\\ng.set_ylabel(\"Count\",fontsize= 17)\\ng.set_xlabel(\"City Names\", fontsize=17)\\nsizes=[]\\nfor p in g.patches:\\n    height = p.get_height()\\n    sizes.append(height)\\n    g.text(p.get_x()+p.get_width()/2.,\\n            height + 3,\\n            \\'{:1.2f}%\\'.format(height/total*100),\\n            ha=\"center\", fontsize=14) \\ng.set_ylim(0, max(sizes) * 1.15)\\n\\nplt.show()\\n'"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'''\n",
    "total = len(df_train_merged)\n",
    "plt.figure(figsize=(15,19))\n",
    "\n",
    "plt.subplot(311)\n",
    "g = sns.countplot(x=\"city\", data = df_train_merged)\n",
    "g.set_title(\"City Count Distribution\", fontsize=20)\n",
    "g.set_ylabel(\"Count\",fontsize= 17)\n",
    "g.set_xlabel(\"City Names\", fontsize=17)\n",
    "sizes=[]\n",
    "for p in g.patches:\n",
    "    height = p.get_height()\n",
    "    sizes.append(height)\n",
    "    g.text(p.get_x()+p.get_width()/2.,\n",
    "            height + 3,\n",
    "            '{:1.2f}%'.format(height/total*100),\n",
    "            ha=\"center\", fontsize=14) \n",
    "g.set_ylim(0, max(sizes) * 1.15)\n",
    "\n",
    "plt.show()\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa0AAAEKCAYAAAChTwphAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xu8VmWZ//HPBYiIBw6KpqBhSlNa\nRro1rJl+mZOipqChY9MkGg5qWvarZtQOw5T1G20mLc3jJIllGYEHVJSIPIyNBzaeAZUtHiCJ81Fg\nc9jX74/7WqzF9tl7PxDP3iz5vl+v5/Wsda973etex2sd7mc95u6IiIiUQaeOroCIiEi1FLRERKQ0\nFLRERKQ0FLRERKQ0FLRERKQ0FLRERKQ0FLRERKQ0FLRERKQ0FLRERKQ0unR0BbZ3e+21l/fv37+j\nqyEiUirTpk1b5O59tnW5Clpt6N+/P/X19R1dDRGRUjGzN2pRrm4PiohIadQsaJlZNzN7ysyeM7Pp\nZva9SD/QzJ40s1lm9lsz6xrpO0d/QwzvXyjrskh/2cyOL6QPjrQGM7u0kL7F0xARke1fLa+0GoFP\nu/tHgIHAYDMbBFwJXO3uA4ClwIjIPwJY6u4HA1dHPszsEOBM4FBgMHC9mXU2s87AdcAJwCHA5yMv\nWzoNEREph5oFLU9WRe9O8XHg08C4SB8DDI3uIdFPDD/WzCzS73D3Rnd/DWgAjopPg7vPdvd1wB3A\nkBhnS6chIiIlUNNnWnFF9CywAJgMvAosc/cNkWUu0De6+wJzAGL4cmDPYnqzcVpK33MrpiEiIiVQ\n06Dl7hvdfSDQj3Rl9MFK2eK70hWPb8P01qaxGTMbaWb1Zla/cOHCCqOIiEhHaJfWg+6+DHgYGAT0\nNLOsqX0/4K3ongvsDxDDewBLiunNxmkpfdFWTKN5fW929zp3r+vTZ5v/zEBERLZSLVsP9jGzntG9\nC/D3wEzgIWBYZBsO3BPdE6KfGP5Hd/dIPzNa/h0IDACeAqYCA6KlYFdSY40JMc6WTkNEREqglj8u\n3hcYE638OgFj3f0+M5sB3GFmPwCeAW6J/LcAvzSzBtLVz5kA7j7dzMYCM4ANwIXuvhHAzC4CJgGd\ngdHuPj3KumRLpiEiIuVgutBoXV1dnbf2RoyFN960qbvP+ee1R5VERLZ7ZjbN3eu2dbl6I4aIiJSG\ngpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaI\niJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSG\ngpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJRGzYKWme1vZg+Z2Uwzm25mF0f6v5vZ\nn83s2ficWBjnMjNrMLOXzez4QvrgSGsws0sL6Qea2ZNmNsvMfmtmXSN95+hviOH925qGiIhs/2p5\npbUB+Ia7fxAYBFxoZofEsKvdfWB8JgLEsDOBQ4HBwPVm1tnMOgPXAScAhwCfL5RzZZQ1AFgKjIj0\nEcBSdz8YuDrytTiN2i0CERHZlmoWtNx9nrs/Hd0rgZlA31ZGGQLc4e6N7v4a0AAcFZ8Gd5/t7uuA\nO4AhZmbAp4FxMf4YYGihrDHRPQ44NvK3NA0RESmBdnmmFbfnPgo8GUkXmdnzZjbazHpFWl9gTmG0\nuZHWUvqewDJ339AsfbOyYvjyyN9SWSIiUgI1D1pmthswHviau68AbgAOAgYC84AfZ1krjO5bkb41\nZTWv80gzqzez+oULF1YYRUREOkJNg5aZ7UQKWLe7+50A7j7f3Te6exPw3+S35+YC+xdG7we81Ur6\nIqCnmXVplr5ZWTG8B7CklbI24+43u3udu9f16dNna2ZdRERqoJatBw24BZjp7lcV0vctZDsVeDG6\nJwBnRsu/A4EBwFPAVGBAtBTsSmpIMcHdHXgIGBbjDwfuKZQ1PLqHAX+M/C1NQ0RESqBL21m22ieA\nLwIvmNmzkfYtUuu/gaTbcq8D5wG4+3QzGwvMILU8vNDdNwKY2UXAJKAzMNrdp0d5lwB3mNkPgGdI\nQZL4/qWZNZCusM5saxoiIrL9s3QBIi2pq6vz+vr6FocvvPGmTd19zj+vPaokIrLdM7Np7l63rcvV\nGzFERKQ0FLRERKQ0FLRERKQ0FLRERKQ0FLRERKQ02gxaZrarmXWK7veb2Snxo2EREZF2Vc2V1qNA\nNzPrC0wBzgFurWWlREREKqkmaJm7rwZOA65191NJfxEiIiLSrqoKWmZ2NPAF4P5Iq+WbNERERCqq\nJmh9DbgMuCteg/Q+0jv/RERE2lWbV0zu/gjwiJntGv2zga/WumIiIiLNVdN68Ggzm0H652HM7CNm\ndn3NayYiItJMNbcHfwIcDywGcPfngE/WslIiIiKVVPXjYnef0yxJf+chIiLtrppWgHPM7OOAx58w\nfpW4VSgiItKeqrnSOh+4EOhL+rv6gdEvIiLSrqppPbiI9BstERGRDlVN68ExZtaz0N/LzEbXtloi\nIiLvVM3twcPcfVnW4+5LgY/WrkoiIiKVVRO0OplZr6zHzHqj1ziJiEgHqCb4/Bj4XzMbF/2nAz+s\nXZVEREQqq6Yhxm1mNg04BjDgNHefUfOaiYiINFPtbb6XgKVZfjM7wN3frFmtREREKmgzaJnZV4BR\nwHzSmzAMcOCw2lZNRERkc9VcaV0M/I27L651ZURERFpTTevBOcDyWldERESkLdUErdnAw2Z2mZl9\nPfu0NZKZ7W9mD5nZTDObbmYXR3pvM5tsZrPiu1ekm5ldY2YNZva8mR1eKGt45J9lZsML6UeY2Qsx\nzjVmZls7DRER2f5VE7TeBCYDXYHdC5+2bAC+4e4fBAYBF5rZIcClwBR3HwBMiX6AE4AB8RkJ3ACb\nfhc2CvgYcBQwqvC7sRsibzbe4EjfommIiEg5VNPk/XsAZraru79dbcHuPg+YF90rzWwm6aW7Q4BP\nRbYxwMPAJZF+m7s78ISZ9TSzfSPvZHdfEvWYDAw2s4eBPdz98Ui/DRgKPLCl04i6iojIdq5d/rnY\nzPqTXv30JLBPFiTie+/I1pf0/CwzN9JaS59bIZ2tmIaIiJRAzf+52Mx2A8YDX3P3Fa1lrZDmW5He\nanWqGcfMRppZvZnVL1y4sI0iRUSkvdT0n4vNbCdSwLrd3e+M5Plx24/4XhDpc4H9C6P3A95qI71f\nhfStmcZm3P1md69z97o+ffpUM6siItIOqmryXvznYjP7JlX8c3G05LsFmOnuVxUGTQCyFoDDgXsK\n6WdFC79BwPK4tTcJOC7+EqUXcBwwKYatNLNBMa2zmpW1JdMQEZESqObHxecDPyV/hvR7qvvn4k8A\nXwReMLNnI+1bwBXAWDMbQWqZeHoMmwicCDQAq4FzANx9iZldDkyNfN/PGmUAFwC3AruQGmA8EOlb\nNA0RESmHVoOWmXUGvujuW/zPxe7+GJWfIQEcWyG/00IwdPfRwDv+eNLd64EPVUhfvKXTEBGR7V+r\ntwfdfSOpmbiIiEiHq+b24J/M7GfAb4FNv9Ny96drVisREZEKqglaH4/v7xfSHPj0tq+OiIhIy9p6\nptUJuMHdx7ZTfURERFrU1jOtJuCidqqLiIhIq6r5ndZkM/tmvLW9d/apec1ERESaqeaZ1pfiu9hU\n3IH3bfvqiIiItKyat7wf2B4VERERaUubQcvMzqqU7u63bfvqiIiItKya24NHFrq7kd408TSgoCUi\nIu2qmtuDXyn2m1kP4Jc1q5GIiEgLqvprkmZWk/6uXkREpF1V80zrXvI/SuwEHALox8YiItLuqnmm\n9V+F7g3AG+4+t6XMIiIitVJN0HoTmOfuawHMbBcz6+/ur9e0ZiIiIs1U80zrd0BToX9jpImIiLSr\naoJWF3dfl/VEd9faVUlERKSyaoLWQjM7JesxsyHAotpVSUREpLJqnmmdD9wefwQJMBeo+JYMERGR\nWqrmx8WvAoPMbDfA3H1l7aslIiLyTm3eHjSz/2dmPd19lbuvNLNeZvaD9qiciIhIUTXPtE5w92VZ\nj7svBU6sXZVEREQqqyZodTaznbMeM9sF2LmV/CIiIjVRTUOMXwFTzOwXpNc5fQkYU9NaiYiIVFBN\nQ4wfmdnzwN9H0uXuPqm21RIREXmnaq60AJ4BdiJdaT1Tu+qIiIi0rJrWg2cATwHDgDOAJ81sWK0r\nJiIi0lw1DTG+DRzp7sPd/SzgKOC7bY1kZqPNbIGZvVhI+3cz+7OZPRufEwvDLjOzBjN72cyOL6QP\njrQGM7u0kH6gmT1pZrPM7Ldm1jXSd47+hhjev61piIhIOVQTtDq5+4JC/+Iqx7sVGFwh/Wp3Hxif\niQBmdghwJnBojHO9mXU2s87AdcAJpP/x+nzkBbgyyhoALAVGRPoIYKm7HwxcHflanEYV8yEiItuJ\naoLPg2Y2yczONrOzgfuBiW2N5O6PAkuqrMcQ4A53b3T314AG0hXdUUCDu8+OF/XeAQwxMwM+DYyL\n8ccAQwtlZa0bxwHHRv6WpiEiIiXRZtBy938BbgIOAz4C3Ozul/wV07zIzJ6P24e9Iq0vMKeQZ26k\ntZS+J7DM3Tc0S9+srBi+PPK3VNY7mNlIM6s3s/qFCxdu3VyKiMg2V82VFu5+p7t/3d3/r7vf9VdM\n7wbgIGAgMA/4caRbpcluRfrWlPXORPeb3b3O3ev69OlTKYuIiHSAqoLWtuLu8919o7s3Af9Nfntu\nLrB/IWs/4K1W0hcBPc2sS7P0zcqK4T1ItylbKktEREqiXYOWme1b6D0VyFoWTgDOjJZ/BwIDSM3s\npwIDoqVgV1JDignu7sBDpGb4AMOBewplDY/uYcAfI39L0xARkZJo8cfFZjbF3Y81syu35hmWmf0G\n+BSwl5nNBUYBnzKzgaTbcq8D5wG4+3QzGwvMADYAF7r7xijnImAS0BkY7e7TYxKXAHfEG+efAW6J\n9FuAX5pZA+kK68y2piEiIuVg6SKkwgCzGcAFwI3AP9LsmZC7P13z2m0H6urqvL6+vsXhC2+8aVN3\nn/PPa48qiYhs98xsmrvXbetyW3uN078Bl5Ke/VzVbJiTmpyLiIi0mxaDlruPA8aZ2Xfd/fJ2rJOI\niEhF1bzl/XIzOwX4ZCQ97O731bZaIiIi71TNC3P/A7iY1IBhBnBxpImIiLSrav6a5CRgYPy2CjMb\nQ2qtd1ktKyYiItJctb/T6lno7lGLioiIiLSlmiut/wCeMbOHSM3eP4muskREpANU0xDjN2b2MHAk\nKWhd4u5/qXXFREREmqvmSgt3n0d6DZKIiEiHadd3D4qIiPw1FLRERKQ0Wg1aZtbJzF5sLY+IiEh7\naTVoxW+znjOzA9qpPiIiIi2qpiHGvsB0M3sKeDtLdPdTalYrERGRCqoJWt+reS1ERESqUM3vtB4x\ns/cCA9z9D2bWnfSHjCIiIu2qmhfm/jMwDsj+7bAvcHctKyUiIlJJNU3eLwQ+AawAcPdZwN61rJSI\niEgl1QStRndfl/WYWRfSPxeLiIi0q2qC1iNm9i1gFzP7DPA74N7aVktEROSdqglalwILgReA84CJ\nwHdqWSkREZFKqmk92BR//Pgk6bbgy+6u24MiItLu2gxaZnYScCPwKumvSQ40s/Pc/YFaV05ERKSo\nmh8X/xg4xt0bAMzsIOB+QEFLRETaVTXPtBZkASvMBhbUqD4iIiItavFKy8xOi87pZjYRGEt6pnU6\nMLUd6iYiIrKZ1q60To5PN2A+8H+AT5FaEvZqq2AzG21mC4p/bWJmvc1sspnNiu9ekW5mdo2ZNZjZ\n82Z2eGGc4ZF/lpkNL6QfYWYvxDjXmJlt7TRERKQcWgxa7n5OK58vVVH2rcDgZmmXAlPcfQAwJfoB\nTgAGxGckcAOkAASMAj4GHAWMyoJQ5BlZGG/w1kxDRETKo5p3Dx5oZleZ2Z1mNiH7tDWeuz8KLGmW\nPAQYE91jgKGF9Ns8eQLoaWb7AscDk919ibsvBSYDg2PYHu7+eDS/v61ZWVsyDRERKYlqWg/eDdxC\negtG0185vX3cfR6Au88zs+wdhn2BOYV8cyOttfS5FdK3ZhrzmlfSzEaSrsY44AD9/6WIyPaimqC1\n1t2vqXE9rEKab0X61kzjnYnuNwM3A9TV1emH1CIi24lqmrz/1MxGmdnRZnZ49tnK6c3PbsnFd9Z0\nfi6wfyFfP+CtNtL7VUjfmmmIiEhJVBO0Pgz8M3AF6YfGPwb+ayunNwHIWgAOB+4ppJ8VLfwGAcvj\nFt8k4Dgz6xUNMI4DJsWwlWY2KFoNntWsrC2ZhoiIlEQ1twdPBd5X/HuSapjZb0hN5Pcys7mkVoBX\nAGPNbATwJuk3X5Bewnsi0ACsBs4BcPclZnY5+e/Cvu/uWeOOC0gtFHchvZ0je0PHFk1DRETKo5qg\n9RzQky18C4a7f76FQcdWyOukP5usVM5oYHSF9HrgQxXSF2/pNEREpByqCVr7AC+Z2VSgMUt091Nq\nVisREZEKqglao2peCxERkSpU839aj7RHRURERNpSzf9prST/PVNXYCfgbXffo5YVExERaa6aK63d\ni/1mNpT0HkAREZF2Vc3vtDbj7ncDn65BXURERFpVze3B0wq9nYA62n5lkoiIyDZXTevBkwvdG4DX\nSW9MFxERaVfVPNPSmyNERGS70GLQMrN/a2U8d/fLa1AfERGRFrV2pfV2hbRdgRHAnoCCloiItKsW\ng5a7/zjrNrPdgYtJL5m9g/SmdxERkXbV6jMtM+sNfB34Aumv6w+Pv70XERFpd6090/pP4DTSP/h+\n2N1XtVutREREKmjtx8XfAPYDvgO8ZWYr4rPSzFa0T/VERERyrT3T2uK3ZYiIiNSSApOIiJSGgpaI\niJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJSGgpaIiJRGhwQtM3vd\nzF4ws2fNrD7SepvZZDObFd+9It3M7BozazCz583s8EI5wyP/LDMbXkg/IspviHGttWmIiEg5dOSV\n1jHuPtDd66L/UmCKuw8ApkQ/wAnAgPiMBG6ATX+bMgr4GHAUMKoQhG6IvNl4g9uYhoiIlMD2dHtw\nCOk/u4jvoYX02zx5AuhpZvsCxwOT3X1J/MfXZGBwDNvD3R93dwdua1ZWpWmIiEgJdFTQcuD3ZjbN\nzEZG2j7uPg8gvveO9L7AnMK4cyOttfS5FdJbm4aIiJRAq/9cXEOfcPe3zGxvYLKZvdRKXquQ5luR\nXrUIpCMBDjjggC0ZVUREaqhDrrTc/a34XgDcRXomNT9u7RHfCyL7XGD/wuj9gLfaSO9XIZ1WptG8\nfje7e5271/Xp02drZ1NERLaxdg9aZrarme2edQPHAS8CE4CsBeBw4J7ongCcFa0IBwHL49beJOA4\nM+sVDTCOAybFsJVmNihaDZ7VrKxK0xARkRLoiNuD+wB3RSv0LsCv3f1BM5sKjDWzEcCbwOmRfyJw\nItAArAbOAXD3JWZ2OTA18n3f3ZdE9wXArcAuwAPxAbiihWmIiEgJtHvQcvfZwEcqpC8Gjq2Q7sCF\nLZQ1GhhdIb0e+FC10xARkXLYnpq8i4iItEpBS0RESkNBS0RESkNBS0RESkNBS0RESkNBS0RESkNB\nS0RESkNBS0RESkNBS0RESkNBS0RESkNBS0RESqOj/k9LdiC33Hbcpu4RZ/2+A2siImWnoCVbZewv\nBm/qPuOcBzcbNubWPEgNP3vzIHXTL4/f1H3eFyfxs1/l/Rf906RtXU0ReZfR7UHZrvzk18fzk18f\n33ZGEdkh6UpLqnJn4crqtGZXVrffmgeZL5ytqyURqR0FLdlu/eg3eTD8189P4ge/zfu/8w8KjiI7\nIgUtqWjC6BM2dZ/ypQdaySki0n70TEtEREpDV1pSSt8dmz9ju/yMB1vJKSLvJgpa8q7wL+PyIPaf\nwxTERN6tFLRkkwduOTHvsY6rh4hISxS0dmCTf54Hqc+cO7EDa7LtffnO/Mrr+tN05SXybqGgJe96\nX7g7D2C3D32QEyacsqn/gVMmdESVRGQrqfWgiIiUhq60djAP/fwkAI459/4Orsn244R7vpT3+G6b\nOh8Yek0H1EZEWqOgJdKKE+++ZFP3xKFXdmBNRAR20KBlZoOBnwKdgZ+7+xUdXCUpiRPvGgXAxFO/\nx4l3/XBT+sRTv82Jd/2o0P+v7V43kR3BDhe0zKwzcB3wGWAuMNXMJrj7jI6tWW08dvNnN3X/7cj7\nOrAmO7aT7sxvNd5/2lc56c7rC/1f7ogqiZTSDhe0gKOABnefDWBmdwBDgHdl0JKOcdKdV23qvv+0\nr7eed/xNed7PncdJ428p9I/gs+NvzTN73nbqvmFn8dlxtxf6v8Bnx91RKLlzYdjpW1B7ke3Xjhi0\n+gJzCv1zgY91UF22iadvPHlT93rzTd0fO09XVpI7edydm7rvHXYaJ4+7J7qHcMq4fFuZMOyzDBmX\n/7bNCsHv7mGfYej4h/L+zx3DqeP/p5A3P6Tc+bmj+dz4+k394z9Xt43mRHZk5u5t53oXMbPTgePd\n/dzo/yJwlLt/pZBnJDAyev8GeBnYC1hUKKrY39qwLcm7rcrZHvKqfpoX1W/Hnpf3unsftjV336E+\nwNHApEL/ZcBlVYxX31J/a8O2JO+2Kmd7yKv6aV5UP81LLT474o+LpwIDzOxAM+sKnAnotQgiIiWw\nwz3TcvcNZnYRMIn0pHq0u0/v4GqJiEgVdrigBeDuE4EtfUPsza30tzZsS/Juq3K2h7yqX/vlVf3e\n3fXbkrzbY/22qR2uIYaIiJTXjvhMS0RESmqHvD3YEjM7EniC9NutpaSg3hVYC+wErAE+TPqLxOeA\nVcB7It8B8d0ZcNKyXR3j7B7pxZOENUAj0A3YOcYh8m0EVgLzgb2BHjHN9cA6YJfIswDoF93Ed1eg\nKYbtAXSPsldHN1HGmig3G28D+a9RVwK7xTxTmHbnGI+oc+eYVjbMo5zuUabF8GzY21G/roXpWiyX\nbB6yOqyO753i46T1sHNhnKboXh3f3QrTXBdlrYrltUthml6oa5f4WKE+2V9grojleGBMb3nUZdco\n/xXggzEdA+qBI6LsN4E9YzmuIf14/bCY5mvAoTGNNcCySN8vluVKUrNhBxZHOZ0ib7ZOsmmubjYP\nGYty9yjMU7Ydd4qys/WabQsbC8t/faGcbFqZ4rYyP6aRLd+FsXy6k9bDWtJ66RRlZMs8Ww79o39j\n1KNblDOb9HOTbHqLo47ZtpBt65BvYxSGZftgNo/FfbO4jDbE+F1I+2O2b3Um366zMizyedQ1258a\nY777xnTWNcublQubb9c0695YyGvk62hDTL8rm6+fdaRlTSFvI/k+WOmvXIvrGNJxrgf5sentQplZ\nuQ7MJG2z2frL6rAM6Am8TlqXnUj73OXAJUDvKLOPu68zs97AbyPv68AZwPeAE2PZnO3uT1eo9ya6\n0grxeqcrgT8A33D3gcDZpFuoA4GvAYcDx5B20kOBh9z9g6TnY6ti/AXkB6KFpANfN2AsaQOZQVqJ\ns0kHtO8DF8b4DjxLWuHdgQ8BXyStzCWkg1p34CHSCu9N2ghXAD8gbaRXkg4UjZF+KvBSdL9N2ujO\niHIWAs8DDaQdYixwA2kjXhHdd8Ui+naMtxPwwyjrRtIPtbsC9wCDY143AreRDvIAXwH+l7QzrgA+\nABwZw9aQThIWxPy/FXXKgsibsR4aI+1a8reXfCuWYbYTXko60G2IcX9E+r1It0gbUSjzq7GclgIv\nkALTzJjGQuBvo777R9pTpJOPtaSDrQMHA7OAx6P8DwB3Rv8BwJ+Ae2P8D5GC3AzgENKB8okYtiS6\n58V8rI/l+SegV5T9YgzLDopHkZ9s/E+sCwPGkA4Ka6O+I0gnPhuj3P8G+kQ5PwPGkR8Hbo9luD6W\nwTDgySj3COD9Mb01pPU+hHQSsXMsux5R9grgu6RtuhPwa+DvYhqTSdveK6QD1/+SgvJa0nr8HfCR\nWI+NpP3jlZiXnwOPRfrbsbz3i3UxD/hNzOdy0nr/t0J9H4x5Wk/azq+NdbAqxltKfsIzjRQkdyJt\nixeQn7TOjHnYFfgjaTtaTzp5fJF0nNgplsFepO1ml5i/W6I7W2fzY/msJN8GupGOHati+MrIf20s\nv5mk48HiyPsKad9ZSB6AZ5O2/WWkbWUd8J+k/XxJLJ83opzsZPCBSO8WaS9FWfcA98eyXkvanq+L\ncd+ItHkx3kLStjaVdDz6Bfl2OiLqfykwxd0HAFNIz78GxGck6ZjTKgWt3FeA8eQHDkg7Y1N0Z2fG\nfyEttyWkMwxIZwnT4tvJr87eA3Tz9OBwT/Iz2udJK8mAMe5+I/m6WA5kZxp9SAHyadLZzftJG3G3\nKPt+8jPLg4FnSAeSBaQNdDopCDaRDj47k3aww0gbXG/SQfThqO/fkQLPQtJO/wOgLsb/oLuPB14l\nvbfxyZiXOTFfPUnB9y9R90WkA0sDcB7pbGoZMNfdX3H3aeQB5Q3yq5p9SD/mXk3acQ8kBc61UfaH\nYvluJB1oIO2UC6IOf476roj0A0hBsVOUsSHm4VzSum4kHTgXkA6MWXldyM+YV8eyey2W2aIYb03M\n41LyM+rrSet6HulA/1Pyq8gnSDv/61GfSVGnBuB40m8Gm6Ksa0kBeVHM7y9iWk3AOnevj3X5GjCI\ntN2sI63zw8mD1H3uvii63yRtT++P4X1j3Oz1FysifV0s638CHolx69x9VgzrRn4l2SXm5TnyA9Te\npMDQhXTw+xj5wfAxd8+uPC3W18YoY0V8boz1lG3bM0gBbR/gquh+xt1fIW0ffyEFiGWxvNbHsmuI\n/kWkQLiUtD+sJ21HK8mvZCFtB8TyyJZndpfEo/znSdtAU5S7X6wXyK8sPeZrBemkZyVp+3k0hi+O\nZTItxp0Z9X2LtK1lV+fTY9jOpG3mFdIxoUuM6zH/S0nbVlMs576kbf6uWK5dSEGyeGfn3ph2z5jG\nEja/Wt83vnuSAm1212ki6aR1l1huTn5H6D53XwKMJg/4S6J+Q6O8IaQTK+L7M8BtnjwB9DSzbNqV\n1fJHYGX5xEp+hLRx3ko6S3ns3emrAAALoElEQVQpFviQWPiNsSEuI23Q9wA/i/FXkHbibIPNzsY3\nxmck6SxsauS9iLRxbizUYUlsdA8DJ5MO2HXAN0kH2RWknWRNTHsRcA1px1oY4y8jv1WxLNJOiWn/\ngbSRzorha6Nu55IOiNktyeyKbGrUKztQXk86M15EupJ4Cfg9+W2dX5M21pVRpxtj2LKY1g1RzhLS\nVVb/SF8FHBt5i8uvnrQzeOSZkS0z4O6Yz3kx7PWo9yzSASk7YDdG2h+inOy2YTadR6Pu62I+Z0T9\n5hXyr456zC4st9kxzkOxnu+LaU4BPhX1WUc6cD9AfpvvBtIBdj75Fd+GqPsy0pVEdrX5BfKr8gbS\nNrexMA8z47sxxnko8s4lv7pYTTrhmRZ5syD3NPnt0YVRb4+6NZLf8s0OnE2Rnk1zY6QvI78V9ybp\nwJkt25eiO7sCeII8AD1TmPdHSdtUU/S/Hsu/GECzui+N8rL6vBzr9mXybWFt5F1O2h4fjro2kg7i\nv4tl8EaUu5K07z9EuuJqIj/YriK/as/mc3HUeU6hHtnyXl/I3xR1fY18G/vHSM+WWxZAHyvMfzav\nWd2ybfGbMa/ZfvsM+d2J7AQq2xbeJt3puI389t5Vsf6yk7rPR75s/G9HvbJ6ZGWtL+Rz0tX3WYX+\nL8e6aiIF1UOBz0b9v0naz18CXozjybJmx951wN8W+qeQTpD04+I2/AS4xN2zM+Kn3P0DpLODr7j7\nYaTbFHsBHyUFuUOAfc1sN9Ltjq+RB5fOwJdIG9ga0hXIrqSzmF1JO041nHRAbyLt5BeQdqZjSBts\ndla0DLiCdNuiiXR2+ARpY7ku8ryXtNP3Jp3hPEG6/XQl6Yx6FWkD6k46qG2IeetGfiAbT7qFtG/U\nbU/SzrCEdJX5UdIODemq7fZCnc4mnY2/FPM/PsZbQjpY3Bd1+h/SwfYw8udz/WJar8X4B8e83xDD\n3gOsilsOfUg72V9IB5ZO5M+gepMfSNeRzpLfIgXf52IZrSId0L5DfsXREHl7RH2yK/BlsRz2LqwH\non816Wr00Vh+y0g7c3ZA2kDa+bOD/C6xXNfH8r+EdMbdJaZ1KemAOZv8SqBzzMtG0rPW54CrYx66\nAi+5++HACTGNFaQz/N6kbXVlLPvsWdsxsQxWkLaVxuh/IZZ19nx0IemqJHuWuDiW8aGxbCBtU9mz\n2uwWVyfgOXf/KPkVzL+Qru6z5zFdYvlnt2L7xTrqEvmnkD+v6xrj9CPtZ03Ar2KddSedsDVE/uzM\n/wORb1dSsJhHuhX8z+T70/tJt1GzAH1ufM8hBZpdY109GetoftTv3yM9u0V5HfnBfQ35idtusSye\niHyHxbSnFpbpK6T9szHq1Pz51PtIxwQnnZhdS9qXiDJPIW0TTfH5h1he2a3yVfFpIp1YjSJ/lpY9\nt86ehWdXRpnsRKwROB04h7TdPkE6oaQwfqXuokrP3Vpt0r7DBi0zu9DMnjWzZ0k766Nmto60Eq43\ns6Hu/ihwkJntRTowNgJHuHt2dr8r+cH3cdJZbna1ciRpJ1tBOsNbR351sZhoSFC4FM4eRmd2Aj4B\nHER+u3AueUOAg4Hh0Z8daA4gHVCyWwKryR8y709+G2886SA8iXQgfor89sAS0plvZ9IthOWkDes4\nUhDaj7TT7R79d0YdekQ9/i769yMd2N4bdc/uw68hHdQnkA6evaOsK2KcB6Iez5Ff7awiBek9Yxk8\nHvX7C3Ax+dUK7r6M/IosC2hHRp3qI192Nn4v6XbZHNLtoi7R3488YOxPOtBnjR26kW5T7Uy6Iu5L\nCtadSMHhXtLOvzsp0Hw78r4vys0+XUlnzt3JG70cE9/HR332i3zvIx1U9oz6Hezug0gH92zZzoy6\nZFeGGyMv5EFribsviOlmt5nOj3xdSNsDsTyyRgjTYhk+Sd4Aw0jPN39N2q6XRL0vJ7+Cy65od451\nl7115gUzGx7Ly939yVin2cH5PaRb58Syrie/2jDS1WvWyKaJdJXUPeb9CfIrxE6kOwzrSc8F18Q8\n9495eZq8AYfFMjmZvNFK9iwxa7iyKtKPJG9Ycq27ryc/+B9NutJeFdOtIwXKnUknvz1I+8AbpONE\nI2lfy65g58d31oikC/kt8FWk7WEN+TPW7PlYli87qchOFl4gf7wxPeZlD/LbmVnAuIn89h9RTtaI\npCvpblPmRtLJF6R9qX9hellDof2ivOxRwU6kEw+A+dkxL77fJu1jmX6FvBXtsEHL3a9z94Hx2cvd\nu7p7V9JZ7Jfd/e74s8iupA3tOdLBZbWZdSc9C+hF2uDGkp7bTCBvpbQv+TOT40gb7+6kjWtY9Dsw\n3MwGkTby7AwjazV1PnASKRB0Ip1R7UkKBrPIb0G+RQqMA8lvWwwgtWzrSr4xZxv57qRgcgbpgLMb\naWN5Hvglaec7NPLfTtooXyWd/Z1MOtvsQbqtOIt0Bfom6YCTPYv4h6jTEaQdc0HU6Wjy20vZVcSf\nY/rvJR0QXo26d4/lewHpwN2D9PB7YCyPAaQrvE7A7mZ2iJkdTQrMfUi3h16OZbmWtPP1IQXNGcBp\npB33M6Qz97djWexCuvLbQH6L7ZGo9xJS44W1pJ32JtLVVHabdA55w4yTSWfpa8hPXv5IfjtpKGn7\nWEo6y74tyn2jsH6ddOIwKvJ2Bt40s4NJB4c+pFtbA0jbXF/SFUQXoJeZDSMFTgP2MLNPxPLblXQl\nMJH8Af65sSwPimX9Kmlb3JO8lWB2oFtEHgz3ijpnrVxfinnvHMvywcgD6SB3Ceng3GRmpxXmqzPp\nqjd7GL+MdMJwUPS/SdrvOpM/m8ka9Kwn7aMfJ63/9aT99uNR/wbSXYuupO3snlhm/Ujb80fJb7c9\nHutqr1guPQqfZbHcDWiMV8HtSzowLyJvjbd7rMODIu+0WPa9Y/w/kW4B707a1leTroyyZd076vcB\n0rbVP/q7x7I7OOreQDoOnRvLZS35FdcnyVsefpj8RGRV1Ll7LKMRMf3V5Ntjtt9MIp04ZM9vHyOd\ntK0hf77+yVh2p0X5p8d0s5agvWJ5QzpGDo/u4aSThbMsGQQsd/diu4J36ujnSdvbJ1ZQdhtgRmwU\nz5EO6C/ESl0XKzt7CPk2+XOUrEVX9uxhVeRdTn6rIGvxlz10z+4je7NPdtAspjUWutc3G9a8OXdj\ns/7iNNY2G5bVo7FQbhPvrNPGZv3FeVhX6N7YbPyNzcrNysmWZ1OzsleTP9doKgzfwDvrkLWKLJad\nPdNaFuUUhzWSPy9ZX5hG1oqrqdCfteRqiumsJn+W1FCoezav2fOPlYVlspa0nfw5ypgcebOfHswm\nHSjWkD/nWxDl/DHGycopTq+J/HlJll5cp8XlVFwfxW1hHemW7AbeuZyz+czKyZ7xvUG+zS5m821g\nbmFaxeeLzdfvWvIgkTVgyLbZt0knZsVhbxSWbfG5W3ZbKtuW5pGCbbYNZc9psluyKwvLI3v+tzrW\n5Q/Jb2c2FqaTTas4n2sK87Km2bxkgSJ73rWqUE52TMie4Tbfx9Y16y/uZ83zVto/W0uv5lNp3CbS\ncTGbn+bHg+x5cjbvy4B/JW0bWf4FpOC4J+mkZgHpVm9v0snTq6Tja6vPs9xdb8QQEZHy2GFvD4qI\nSPkoaImISGkoaImISGkoaImISGkoaImISGkoaImUgJn1N7MX28jzKTO7r73qJNIRFLRERKQ0FLRE\nyqOLmY0xs+fNbJyZdTezwWb2kpk9Rnojgci7moKWSHn8DXBzvMB5BfB10otdTya96us9HVg3kXah\noCVSHnPc/U/R/SvSC1lfc/dZnl5t86uOq5pI+1DQEimP5u9c61EhTeRdTUFLpDwOiLfYQ/oTvz8A\nB5rZQYU0kXc1BS2R8phJ+iub7C/fryb9K/b90RDjjdZGFnk30FveRUSkNHSlJSIipaGgJSIipaGg\nJSIipaGgJSIipaGgJSIipaGgJSIipaGgJSIipaGgJSIipfH/AdfcA9drlmLWAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x25be7873fd0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(df_train_merged['bd']);\n",
    "plt.xlabel('bd');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    7.377418e+06\n",
       "mean     1.753927e+01\n",
       "std      2.155447e+01\n",
       "min     -4.300000e+01\n",
       "25%      0.000000e+00\n",
       "50%      2.100000e+01\n",
       "75%      2.900000e+01\n",
       "max      1.051000e+03\n",
       "Name: bd, dtype: float64"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_merged['bd'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "发现有异常值且占据分量较多。年龄不可能小于0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#把小于3的值去除\n",
    "ulimit=3 \n",
    "df_train_merged = df_train_merged[df_train_merged['bd'] > ulimit]\n",
    "ulimit1=90\n",
    "df_train_merged = df_train_merged[df_train_merged['bd'] < ulimit1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    3.918706e+06\n",
       "mean     2.859174e+01\n",
       "std      8.555085e+00\n",
       "min      5.000000e+00\n",
       "25%      2.300000e+01\n",
       "50%      2.700000e+01\n",
       "75%      3.200000e+01\n",
       "max      8.900000e+01\n",
       "Name: bd, dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train_merged['bd'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看出，至少百分之50的人年龄在23到33之间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAEKCAYAAABUsYHRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XmcHVWZ//HPE8ImCAQJW0gmgAgy\nqIiIKL9RhBHCvgWVYSADOHEB1JFxBsYFFVEch0HRgZmMRAKyaSCAEIgxIIwgYSfsEJKQNFlJQlay\n9vP743kqt7rpdG46ualO9/f9et1X3zpd59SpqlP11HJulbk7IiIiG1qPqisgIiLdkwKQiIhUQgFI\nREQqoQAkIiKVUAASEZFKKACJiEglFIBERKQSCkAiIlIJBSAREalEz6or0FnssMMO3r9//6qrISKy\nUXniiSfedPfeHcmrAJT69+/P448/XnU1REQ2Kmb2ekfz6hKciIhUQgFIREQqoQAkIiKVUAASEZFK\nKACJiEglFIBERKQSCkAiIlIJBSAREamEApCIiFRCT0KQdTZ02BEths8e9IeKaiIiGxOdAYmISCV0\nBiRr7fprj2yZYNXUQ0Q2bgpA0sKIXw9Y9f2ks+7tcDn/c33LIPXFM0Z1uCwR6Zp0CU5ERCqhACQi\nIpVQABIRkUooAImISCXUCUE2mF/cUOuYcP7p6pQg0t3pDEhERCqhACQiIpVQABIRkUooAImISCUU\ngEREpBIKQCIiUgkFIBERqYQCkIiIVEIBSEREKqEAJCIilVAAEhGRSigAiYhIJRSARESkEgpAIiJS\nCQUgERGphN4HJJW5/KYjWwxfcJreESTSnSgASbtu+fWAFsOfO+veimoiIl1Nwy7BmVlfM7vfzF40\ns+fN7GuZvr2ZjTazV/Nvr0w3M7vSzMab2TgzO6BU1qAc/1UzG1RK/4iZPZt5rjQza28aIiLSeTTy\nHtAK4AJ3fz9wMHCume0LXAiMcfe9gDE5DHAUsFd+BgNXQwQT4GLgY8BBwMWlgHJ1jlvkKw7XVzcN\nERHpJBoWgNx9mrs/md8XAC8CfYATgGE52jDgxPx+AnCdh0eA7cxsF+BIYLS7z3H3ucBoYED+bxt3\n/4u7O3Bdq7LamoaIiHQSG6QXnJn1Bz4MjAV2cvdpEEEK2DFH6wNMKWVryrT20pvaSKedaYiISCfR\n8E4IZrY1cCvwdXefn7dp2hy1jTTvQPra1G0wcQmPfv36rU3WLuGuoUe1GD727HsqqomIdEcNPQMy\ns02J4HODu9+WyTPy8hn5d2amNwF9S9l3A6auIX23NtLbm0YL7j7E3Q909wN79+7dsZkUEZEOaWQv\nOAOuAV509/8s/etOoOjJNgi4o5R+ZvaGOxiYl5fPRgFHmFmv7HxwBDAq/7fAzA7OaZ3Zqqy2piEi\nIp1EIy/BHQKcATxrZk9n2r8BlwG/NbNzgMnAqfm/kcDRwHhgMXAWgLvPMbNLgMdyvB+4+5z8/mXg\nWmBL4J780M40RESkk2hYAHL3P9P2fRqAw9sY34FzV1PWUGBoG+mPA/u1kT67rWmIiEjnoWfBiYhI\nJRSARESkEgpAIiJSCQUgERGphAKQiIhUQgFIREQqscZu2Ga2FfC2uzeb2fuAfYB73H15w2sn3c6l\nt7R8Sd23PqeX1Il0VfWcAT0IbGFmfYhXG5xF/PhTRESkw+oJQObui4GTgV+4+0nAvo2tloiIdHV1\nBSAz+zhwOnB3pulV3iIisk7qCUBfBy4CRrj782a2B3B/Y6slIiJd3RrPZNz9AeCB7IyAu08Avtro\niomISNe2xjMgM/u4mb1AvFIbM/uQmV3V8JqJiEiXVs8luJ8BRwKzAdz9GeCTjayUiIh0fXV1JnD3\nKa1epb2yMdUReadv/27Aqu8/PPXeCmsiIutTPQFoipl9AnAz24y4//NiY6slIiJdXT2X4L5EvCiu\nD9AE7M9qXhwnIiJSr3p6wb1J/AZIRERkvamnF9wwM9uuNNzLzN7xemwREZG1Uc8luA+6+1vFgLvP\nBT7cuCqJiEh3UE8A6mFmvYoBM9sePYpHRETWUT2B5HLgYTMbnsOnApc2rkrSCKOuObrF8JHnjKyo\nJiIioZ5OCNeZ2RPApwEDTnb3FxpeMxER6dLqvZT2EjC3GN/M+rn75IbVSkREurx63oh6PnAxMIN4\nAoIBDnywsVUTEZGurJ4zoK8Be7v77EZXRkREuo96esFNAeY1uiIiItK91HMGNAH4k5ndDSwtEt39\nPxtWKxER6fLqCUCT87NZfkRERNZZPd2wvw9gZlu5+6LGV0lERLoDvRFVREQqoTeiiohIJeoJQLj7\nlFZJeiOqiIisE70RVUREKqE3ooqISCXaDUBmtglwhruf7u47ufuO7v739TwVwcyGmtlMM3uulPY9\nM3vDzJ7Oz9Gl/11kZuPN7GUzO7KUPiDTxpvZhaX03c1srJm9ama35NkZZrZ5Do/P//dfqyUiIiIb\nRLsByN1XAid0sOxrgQFtpF/h7vvnZySAme0LfB7468xzlZltkgHwv4CjgH2B03JcgJ9kWXsRD0o9\nJ9PPAea6+3uBK3I8ERHpZOq5BPeQmf3SzP7GzA4oPmvK5O4PAnPqrMcJwM3uvtTdJwLjgYPyM97d\nJ7j7MuBm4AQzM+AwoHhH0TDgxFJZw/L7cODwHF9ERDqRejohfCL//qCU5kQA6IjzzOxM4HHggnzF\ndx/gkdI4TZkG8Sy6cvrHgPcAb7n7ijbG71PkcfcVZjYvx3+zg/UVEZEGWNM9oB7A1e7+6Vafjgaf\nq4E9iY4M04i3rUK84qE170B6e2W9g5kNNrPHzezxWbNmtVdvERFZz9Z0D6gZOG99TczdZ7j7yiz3\nf4lLbBBnMH1Lo+4GTG0n/U1gOzPr2Sq9RVn5/21ZzaVAdx/i7ge6+4G9e/de19kTEZG1UM89oNFm\n9s9m1tfMti8+HZmYme1SGjwJKHrI3Ql8Pnuw7Q7sBTwKPAbslT3eNiM6Ktzp7g7cDwzM/IOAO0pl\nDcrvA4H7cnwREelE6rkHdHb+Lf/2x4E92stkZjcBhwI7mFkT8VbVQ81s/8w/CfgigLs/b2a/BV4A\nVgDnZg88zOw8YBSwCTDU3Z/PSfwrcLOZ/RB4Crgm068Brjez8cSZz+frmEcREdnA6nka9u4dKdjd\nT2sj+Zo20orxLwUubSN9JDCyjfQJ1C7hldOXAKeuVWVFRGSDW2MAyh5r7+Du163/6oiISHdRzyW4\nj5a+bwEcDjwJKACJiEiH1XMJ7vzysJltC1zfsBqJiEi3UM8ZUGuLiV5qIpX4p1tbPuHpilPuragm\nIrIu6rkH9HtqP+TsQTyT7beNrJSIiHR99ZwB/Ufp+wrgdXdvalB9RESkm6gnAE0GpmX3ZsxsSzPr\n7+6TGlozERHp0up5EsLvgObS8MpMExER6bB6AlDPfBUCAPl9s8ZVSUREuoN6AtAsMzu+GDCzE9Cr\nDUREZB3Vcw/oS8ANZvbLHG4C2nw6gkhVzhrRsmv2r09S12yRzq6eH6K+BhxsZlsD5u4LGl8tERHp\n6tZ4Cc7MfmRm27n7QndfYGa98gnUIiIiHVbPPaCj3P2tYiBfoX1046okIiLdQT0BaBMz27wYMLMt\ngc3bGV9ERGSN6umE8BtgjJn9mngkz9nAsIbWSkREurx6OiH8u5mNA/42ky5x91GNrZasq/t/dcyq\n75/+wt0V1kREpG31Pg37KWBT4gzoqcZVR0REuot6esF9FngUGAh8FhhrZgMbXTEREena6jkD+hbw\nUXefCWBmvYE/AsMbWTEREena6ukF16MIPml2nflERERWq54zoHvNbBRwUw5/DhjZuCqJiEh3UE8v\nuG+a2cnA/wMMGOLuIxpeMxER6dLq6gXn7rcBtzW4LiIi0o3oXo6IiFRCAUhERCqx2gBkZmPy7082\nXHVERKS7aO8e0C5m9ingeDO7meiAsIq7P9nQmomISJfWXgD6LnAhsBvwn63+58BhjaqUrJ2Hhhzb\nYviQwXdVVBMRkfqtNgC5+3BguJl9x90v2YB1ElkvjrqzFpjvOV5BWaSzqed3QJeY2fHAJzPpT+6u\nrVlERNZJPQ8j/THwNeCF/Hwt00RERDqsnh+iHgPs7+7NAGY2jHglw0WNrJiIiHRt9f4OaLvS920b\nUREREele6jkD+jHwlJndT3TF/iQ6+xERkXVUTyeEm8zsT8BHiQD0r+4+vdEVExGRrq2uS3DuPs3d\n73T3O+oNPmY21MxmmtlzpbTtzWy0mb2af3tlupnZlWY23szGmdkBpTyDcvxXzWxQKf0jZvZs5rnS\nzKy9aYiISOfSyGfBXQsMaJV2ITDG3fcCxuQwwFHAXvkZDFwNEUyAi4GPAQcBF5cCytU5bpFvwBqm\nISIinUjDApC7PwjMaZV8AjAsvw8DTiylX+fhEWA7M9sFOBIY7e5z3H0uMBoYkP/bxt3/4u4OXNeq\nrLamISIinUi794DMrAcwzt33W0/T28ndp0Fc1jOzHTO9DzClNF5TprWX3tRGenvTeAczG0ycRdGv\nX7+OzpNsJI664x9bDN9zwv9WVBMRgTWcAeVvf54xs0bvna2NNO9A+lpx9yHufqC7H9i7d++1zS4i\nIuugnm7YuwDPm9mjwKIi0d2P78D0ZpjZLnlmsgswM9ObgL6l8XYDpmb6oa3S/5Tpu7UxfnvTEBGR\nTqSeAPT99Ti9O4FBwGX5945S+nn52oePAfMygIwCflTqeHAEcJG7zzGzBWZ2MDAWOBP4xRqmIfIO\nR99+QYvhkSdeXlFNRLqfen4H9ICZ/RWwl7v/0czeBWyypnxmdhNx9rKDmTURvdkuA35rZucAk4FT\nc/SRwNHAeGAxcFZOe46ZXQI8luP9wN2Ljg1fJnrabQnckx/amYaIiHQiawxAZvaPxI367YE9iZv9\n/w0c3l4+dz9tNf96R77syXbuasoZCgxtI/1x4B2dI9x99prqJiIi1aunG/a5wCHAfAB3fxVYbc8y\nERGRetQTgJa6+7JiwMx60oEeZyIiImX1BKAHzOzfgC3N7DPA74DfN7ZaIiLS1dUTgC4EZgHPAl8k\nOgx8u5GVEhGRrq+eXnDN+RK6scSlt5ez04CIiEiH1dML7hii19trxBMIdjezL7r7Pe3nFBERWb16\nfoh6OfBpdx8PYGZ7AndT+92NiIjIWqvnHtDMIvikCejxNiIiso5WewZkZifn1+fNbCTwW+Ie0KnU\nnkwgIiLSIe1dgjuu9H0G8Kn8PgvQW0ZFRGSdrDYAuftZG7IiIiLSvdTTC2534Hygf3n8Dr6OQURE\nBKivF9ztwDXE0w+aG1sdERHpLuoJQEvc/cqG10Tq8tj/HNdi+KNf1FORRGTjVE8A+rmZXQz8AVha\nJLr7kw2rlYiIdHn1BKAPAGcAh1G7BOc5LCIi0iH1BKCTgD3Kr2QQERFZV/U8CeEZYLtGV0RERLqX\nes6AdgJeMrPHaHkPSN2wRUSkw+oJQBc3vBYincjRt39n1feRJ15SYU1EurZ63gf0wIaoiIiIdC/1\nPAlhAdHrDWAzYFNgkbtv08iKiYhI11bPGdC7y8NmdiJwUMNqJNLJHD3i0hbDI0/6VkU1Eela6ukF\n14K7345+AyQiIuuonktwJ5cGewAHUrskJyIi0iH19IIrP3xsBTAJOKEhtRHZSBwz4qcthu8+6ZsV\n1URk41XPPSC9F0hERNa79l7J/d128rm76wcSIiLSYe2dAS1qI20r4BzgPYACkIiIdFh7r+S+vPhu\nZu8GvgacBdwMXL66fCIiIvVo9x6QmW0PfAM4HRgGHODuczdExUREpGtr7x7QT4GTgSHAB9x94Qar\nlYiIdHnt/RD1AmBX4NvAVDObn58FZjZ/w1RPRES6qvbuAa31UxJERETqpSAjIiKVqCQAmdkkM3vW\nzJ42s8czbXszG21mr+bfXpluZnalmY03s3FmdkCpnEE5/qtmNqiU/pEsf3zmtQ0/lyIi0p4qz4A+\n7e77u/uBOXwhMMbd9wLG5DDAUcBe+RkMXA2reuhdDHyMeDr3xUXQynEGl/INaPzsiIjI2uhMl+BO\nILp6k39PLKVf5+ERYDsz2wU4Ehjt7nOya/hoYED+bxt3/4u7O3BdqSwREekkqgpADvzBzJ4ws8GZ\ntpO7TwPIvztmeh9gSilvU6a1l97URrqIiHQi9TwNuxEOcfepZrYjMNrMXmpn3Lbu33gH0t9ZcAS/\nwQD9+vVrv8YiIrJeVXIG5O5T8+9MYARxD2dGXj4j/87M0ZuAvqXsuwFT15C+WxvpbdVjiLsf6O4H\n9u7de11nS0RE1sIGPwMys62AHu6+IL8fAfwAuBMYBFyWf+/ILHcC55nZzUSHg3nuPs3MRgE/KnU8\nOAK4yN3n5I9lDwbGAmcCv9hQ8ydSOOa2q1oM333yVyqqiUjnVMUluJ2AEdkzuidwo7vfa2aPAb81\ns3OAycCpOf5I4GhgPLCYeCAqGWguAR7L8X7g7nPy+5eBa4EtgXvyIyIincgGD0DuPgH4UBvps4HD\n20h34NzVlDUUGNpG+uPAfutcWZG1cMxtP1/1/e6Tv1ZhTUQ2DlV1QhDplo65dUiL4btPGbyaMUW6\nvs70OyAREelGFIBERKQSCkAiIlIJBSAREamEOiF0cuOuPn7V9w9++c4KayIisn7pDEhERCqhACQi\nIpVQABIRkUooAImISCUUgEREpBIKQCIiUgkFIBERqYR+ByTSyRx763Uthu865UyOHX5Dy7SBp2/I\nKok0hAKQyEbo2OG3tBi+a+DnKqqJSMcpAIlU7Nhba6+0uuuUsztezvDhLYbvGjiQ44bfvmr49wNP\n7HDZIo2ge0AiIlIJBSAREamEApCIiFRCAUhERCqhACQiIpVQABIRkUqoG7aIvMOJw8es+n77wMMr\nrIl0ZQpAIt3I8cPvajF858BjOWH4qBZpdww8ckNWSboxBSARWaOTbn2wxfCIUz5ZUU2kK9E9IBER\nqYQCkIiIVEIBSEREKqF7QCKy3gy89clV34efcgCn3vp8i///7pS/3tBVkk5MAUhEOuTkWx9pMXzb\nKQdXVBPZWCkAicgG9bnbJrQYvuXkPSqqiVRNAagTeeWXJ7QYft95d1RUExGRxlMnBBERqYQCkIiI\nVEKX4Coy+cqBLYb7fXX4asYU6X6+P2Jqi+GLT9qVn42Y3iLt6yftzK9um7lq+Asn78hNt85qMc5p\np/RuXCVlnXXZAGRmA4CfA5sAv3L3yyqukoisxldHTFn1/cqT+lZYE9mQuuQlODPbBPgv4ChgX+A0\nM9u32lqJiEhZVz0DOggY7+4TAMzsZuAE4IVKayUiG9ztw99sMXziwB2455Za2lGf24H7bmx56e6w\nv+vNQ9e1TNtspbcY/uhZOzJuyMwWaR8cvCMvXzVj1fDeX9lpnere2owrxrUY3umfPrhey9/QumoA\n6gNMKQ03AR9b20Jm/ffQFsO9v3T2utVKRGQ1pv904qrvO39zd6Zf/lKL/+98wT4dLnvGlQ+0GN7p\nq5/qcFnrk7n7msfayJjZqcCR7v6FHD4DOMjdz2813mBgcA7uDbwM7AC0PGSqL62R+Tpjnbp6vs5Y\np66erzPWqavnWx9l/5W7d6y3h7t3uQ/wcWBUafgi4KI68z7ekbRG5uuMderq+Tpjnbp6vs5Yp66e\nb32W3ZFPl+yEADwG7GVmu5vZZsDngTsrrpOIiJR0yXtA7r7CzM4DRhHdsIe6+/NryCYiIhtQlwxA\nAO4+EhjZgaxDOpjWyHydsU5dPV9nrFNXz9cZ69TV863Pstdal+yEICIinV9XvQckIiKd3froybAx\nfoBJwLPA08DjRDfsp4HZwHJgJfAgMB9YASwGXgUmA28Dnp8Ds7xnM08zsBB4GFiSw0uBV4CbgLey\nPAeeAhZlvqX5ebZVvruBCZmnOeu2GFhQGqcJOD/Las7PfcD9wLQcXgG8lPVvzuk/CPTNtOWl8l4G\n5pXyTQAeyvGKuv8MmFia5xX5/6n5fUVOexGwLMdZCUwnOoksLc3PjJyHcj2fLY2zlOguPynHd2Au\n8Vuvt0v5RgOzStNbmnV8JssvluuC0vJckcvu0VxvRdr/lMpuzu9zgZmlcV4B/j7rVczLWcCepTrM\nB67OefRcHoOAG6m1s3nE/crJpbIHZ7v6h9IyHwQ8kd9XAq8B++d6LpbvVcD/Aa+X6v4E8O+lcV5p\ntZxWZr0nl5b5SuCUrH/RXt7IeZ3fqux5rcq6NMebWFpfb+V4nuO9nstkbqmsu3P5LC2lrcjpFsNN\nwD+2qsMw4I7M57meh2VZnnW4Criy1Tr9eWmZN+f3fXL5Fct8KHBZaf6agd8RbbgYZyaxzRTtx4E5\nmbaklLaQ2jZTjDOL2rbnwD3ENrKslPZWrutyWc9m/mJ4Wo6zNMtfQmyLRdnLge8Cj5TKXg4MLy2n\nlblemkr5moG/AIflOl2ay/4soj2+mp9B2V7fkdbufrjqQFBxANqhjfRPAgdm47o/h+/OlXZ0Nqq7\ngH/NFVYEoG/ninuR2DkvyAbwMrAZsRHdl2mLckVPAEbktLYHehEBcAmxE+6V9TyW2Nm+DFxRGqcp\ny55ABKWZwB7AeVnmRdl4fwyMJ3YAC4HDgT/nPJ0J/HOW0S/HX5jT/QC1ndWoTHsoG2dR99mZb2JO\nfzqxAb+W83xSfv8AsbOZTQSEqZn2BrWdzulEIJie05oB7JjlziUerVRsIE/m9ynAVsTBwyLg+1m3\nHTP/PGD3zDM7l9MiYmdT5GsCfkBs0Ftn2rScp+2B3tR27suAQ4iDlreyvLnEhjw+l9UrOY2RwA35\n/7mZdl/W7/ZMG0n00Jydn+E577Oz3m/lfK7IeV2c625MLtdvZ9rNmTaJ+I1GUdb0rNPKXIejiN9v\nNOey354ITrOI9T4d2Bz4CbWDpc/kNJvyMz2XSa8cZ34uqz2Af8ry38hl/GDWf1mO953MNyGnuRjY\nmVpbn1wq64qs99vEDq9o68tz2e0BfCn//5ec552oBYw5xA73/xE77EdzmtsTwWhZ1nUPoi1MzWm/\nWar3C7k+F2a+HfP7POBPwG5Z7xHUtr+fE23v4SzzvdTW/3zgp7kMZwJ/zHnaIf8/j9guirQF+ZkI\n/A2xnSwD/iWX3a45TyuIbXMa0bauJYLKpKzT5KxDuV1PpHbA/A85D29mnccR7WBa1ukNov0V7eXN\nLLvYb00g2uuEVmm92tsP6xJcK+7+IPB+YuUNyeF9cvg0IrjsBdwCGPCezPoa8ACxwTxAbJhvAcvd\nfRmxAewIbEqsVCd2Tu8Hlrn7HHefSzSk8cCKHB4FHEBtwxuYac1Ac5Y9lmgsT3k8fugOajsOiI3t\neSKo9XT3Mfn/JuBTROMa7e6TiSC3GJjn7s8SAXUS8XSJLYCvZD0eJjasVzLfuCzPiCDwHHGG94Gc\nn+cy/31E45yV5U8jNoyexAbQM+evX87Hwvz/MuCjOQ8rid6NU3O+iqPqWcChxJnQQuIS8xTggvz+\nRi6PxcQGWeRrAr4IXEPtqG8h8LS7z8l5MmJHuTin3SPz9ci0q4gdyrJcLjdnOb/MeVpO7EiKZXcw\nESCaeecZDMQR5HdzmV2QaW9mPf5I7HTHA1/L+R6SaS8DFxIHKL8mdgavZv47cvqvF+Xk/P0+874L\nuMndlxLtfAtih/cU0V7GAtsQv7Gble1zbI43MdveFlmvdxNt8nxqZyELgbcz32hgO+D/3H16qa2/\nm9gxTiTaenHGsjjb+hO5PB/N6T1ABKa/Bka6+4xcjz1z3S/Jz63EcyFvy3l+JNfdG+4+wd0XEW2p\nD7WzYs9l0x94NvMtyfW/BPihuzcRbfrQXE4TgZOB64ngNIsIADOJ9rpFzsNrwK+Ix4aNJrbxyTne\n4FLa7CxjPnFm+QIRqL6a092P2D6XENvMCznv84n2tDDLLQ6It8llNj/HK64ILCb2TU8D2xJBbEti\n/zY56z2y1F4gHnc2p7Q+v0HsR8ppA2hHdw5ADvzBzJ7IJyKUHUesvOJxPr2JDag/tUBSlFF870Ps\nTAHOzrxLgR3NbAqxgawgjsqW5HhTyQ3VzMaZ2VBiJ7AVsIeZPZDT2DPLfhdxZPVm/t05yz6SaGiW\n5Q4mNsotiZ3A5sCHiYZU9Hzcgtg5rci6TzGz/kTAmAgsMbNfAscAu+R4bxJHQ5sRG9G2wD5m9iJx\nVjUvp3ks0fD2IzawKcCpRHv7A7HB7GlmU4lLSA9k3WcTjXarHD6O2CDHZJ3fS5yd9s08C4FPlMbZ\nmtiQzsj/LSaC2jm5nu4hNjiIHysvINbzi0RQ/EKmbUeelZnZ07nc5hAbaHGA8V5qBxQPZ7m7EBvs\nMuIIFyJIbUmcsRU+TKzPmTmfZ+Q6WkYEpa1zOZ1MnGkVvzjvl8vobOLMvE9O7xXiMt9BxI5lIBFs\nDsnx9yB2TD/O9dSXaFdHm9kTwPeIdWrAiZl2HNFWPOvykZxWT+Azpe3mr3LeDzazZcTZ93U5P9sQ\nl7C2zvrvAFxqZk25HjcBPmFmi8zslZxWT+CD1A40ioON681sDnBEpu1nZk9ROxDcGjjczH5F7aBw\n16z3t3McBz6a+f6D2A53NbPpZrYwp7s8635tllPkO8DM3iYOtJYT7eV6M5tJbSe7O3EAsEXWfxKx\n7d5GnJn9OeflCOJy/IvENj2F+K3iCuLqQH/ilsBtuV6ezjLfQwTXFTn9dwG/IdrrM0T7+3R+Tsny\n30+0tV2yvBlEe1tGtN3xWbcbifZS7GP+hWj304kzx81zuUO0r81zORSast6tH4HWh3Z05wB0iLsf\nQFzWOdfMPgmQP1z9W2Il16PoRljs/HtTux8CMNPd+xIrcR8igJXNJHYw+xMNZQuiYUwAvgl8rjTu\ntkTDNaLRTc+ybyB2AvvmxnUOtWvBEEd/Xy+GzWxrIiDeQzREIxr6Q8QGNjHTPg58ljjK3Ydo5LdS\nC6LTiCP8xcC9xJmhAX9HnC3eS5xh9SSeTn4TtSO1xzL/z4id6WvEZZSvEDsqMs9uuWw2yWU9gmjY\nBxCB6pEc53TiTGkW8KMso2eWvZi4bHAQtbOim4mNcrdS+khiI1+U89Ls7vsTlxRXEgcL04n28URO\nawlxZnZ1LsPimnzhcGrX+yGNr4PiAAAInUlEQVTaRxEcIQJ+cS/rQWLjXki0y82JnT+5XIuzkDOJ\nnVmvTJ9JnMFNzrotJQ5uDs+yluV0LyQC9ZNZ90WZf79cViuJS2RHAefmdEfndjIux3uOOEs7ijjT\n3YK4hHhjLssexM50CfDbHG+bzD+OCFizifW5IsvbPad1OtEmbyfawmY5n4ty/qYQAXkTYhtppnZ2\n2pz1WkQcSBixI30il+2niCD1KBHkVxLtZ4a770zs2IuzwIfd/cO53g7Jelyb4y8k2sFy4l7QTcSB\n13bEDn9M/j2WaOdjc1l8M9cNwPGZt7AJsZ1PIbYxiLPi4ix2byKY7lzKZ1nXHsSl9oOI7eIW4iDu\nbuB9xFnZHcS2sStx0PkT4qBoCRE0Hsrh/yPW4a7EpdSHiW10T6JtHm1mjxIHafV2n253vG4bgNx9\nav6dSezUDsp/HUVsFMuII0WIjXVTavdFikfgWv4PYuUfQjTO06mdeRTeIhrysUTD2I04Ip0JbOru\nzcD/Zplzs26PEutoGtEotiUaWE9io56fZU8lNo4XsrxhRFBrynHvdPfbMs8KIojMIHYEU7P8c4mN\n6jBip7IPcEPmK+r+eWJnsX3WfRpx7fgG4j7SdjnPv8t8lmUel2V+O+d7l1yONxAb5k4xu34YcWRa\nHHX2c/e3iEsjPYkg1ZfY2TyU9WgmLkMVnSbGA33zUsEo4EM57fOJQLYFcSmmR677e4kdxQKgKac3\nnNggtzSz9xDBrliWu+UlzD8Rwaq4TNEr52OL/Hw1l+V3iB3MCUQQ+ECW96FcZrsSG3ivHOdGYkfz\nkVzm3yN2aj2IM64jieB5aOaBaFM353xtl/kvJAJjf+IodEfg4qzT3kR7HpXBpQhyi4F9c7mMpuXO\nYzm1S4x9ie1kBdGOnstlPjPr8eEs/7PEDr9nznMPd59ObG/FmeiszHdjjlfcizwqy+qV83Rt5ivq\n9JS7f4Q4SPGs+9657t5H7aY6ufz6EMFp71xe36B22ZPMt0l+P8zMJhE7+IOybHIb/XmO8zZxRlGc\nsTYT29fJxEHVPkQ770UE/F8R7aMZeC4vFb4/yx5I7fJ6cU/p45lvCBG0t8z6jCXa/Wb5eZLa9rIr\nsU+4LfNsmuXtQNzj/Btim39PXmYtLmcudfeiQ0JxprMr0X6GEGfQjxDt5SAiGL2d5ReKevdtldby\nzYKtVd0ZoIoPsbN+d+n7w8CAHL6Z2DFMIo7GjGj4y4nLUS8TR9Mn0bITwkBi43wlG91EYgdd7oQw\nmjhKfTnLf464nLIs8/wbtV4mL1DrDNGb2PgXZ1nPZH2m5PBrWeYC4pLHb3I6I7O872f5c3JaQ4nL\nSK8QZ0J/ynl5b45X3PjcvVT3qdRu6Bd1H5NpRd2XEw3z3zOtuGm+iNhwirovII4kdyd2UotzGvsQ\nR7wLc35mEhvx9FwmexI7qCbiSLgpl/HYHJ5BXH6YWMq3iNiAJhFH1W9nnueIgD49p3d9LrNtiSPY\n4l7JN4gj+SXE5aWVxIHGX3Le78vpnURs3NNyuUyn1glhdo4zozT+j7Iu91LrJDGR2DlOzLL3KKUV\n9+xeIu7tFcv+maz3Z4id0RJqD4ocQwTh17PeX8zhWcT6n5jTnke0jVuy/r2odQ6ZlMMPE23pSzlv\nL+f0ZhDBcAIRACfm8jy4lLaEaAtTiUAwNtfnc8TBVp+cj5VEwJxGnCGPpdaTblAu86lEe5mSy2dc\nLps7cx4uIw5OiuX1CLHzn0lsJ8U9ustynOm5DC4h2mXRu7LoADKLOABqyuUwhggic4l2flnOy9Kc\nxtgsa2bO/9vEQdaRWe/lRLvYjFpbWUl0air2G4flMjyXuBS2KD/fzHzLqHWiODeXjef8TSPOyopg\nvYTYrq7POk0prdOiE8IbOVxcoXgr064nDlbfzDImEkH0gVxur2e+oqyivZbTtm93X1x1MKgoAO2R\nDf6ZbFzfyvTzs4FMp3bTtOgqW3RbLC4/eenzHLVu0UXa/GwgxfBU4tLCnFLaG9koi+GlxEZdzndP\nNoyii2nRG2p+aZw5xBFkMdycjcZzXoq0GbSstxM7tKLcYjyn1qXbS/kmlur2R2o3l4ujTSc21HI3\nU6fW7Xllzo+XymkmdoCzS8OP53pZWir7e63KXtlq2TXncllWKrtYVuOo9YBrzvVX1Hs5cXlyXCnf\ncuCHtOzKPIa4nPFmKe0l4ghxatZnBdFba49W8z6k1Xp/IMddSG0nMTPnr+ge/uVsk1eW1vVZxE6m\nGOc+Yuf9cintK5lvErHzW5T5flGa59eo/dygWBf/Qu3Mv0g7l9gBl9vn/FK+YudV9Mwqzij+m9i2\n3qB2H2cGLX8m8Dy1G+pFWpGv6M5c9NqaWhpnCnE2VNRzZS7fp6i1l8XEjrOoZzHP5Z8RrCS2tfJ2\n+zIRyF8tlX850eu1KGtFrpNJ1NriZKJr+DxqP0G4hLgEWsz/EqJr/4JcL0U3+iIILMu0+4n9RDG/\ni3LdzM/pzKfWA3A5tf3Tn4n2WnSfLvZRK2jZhsvLYDmxPRbLrehy/aOsz4r8ezzRc6/oTTmLaFPF\nve55wFnZ7s4mrkKML9La++hJCCIiUoluew9IRESqpQAkIiKVUAASEZFKKACJiEglFIBERKQSCkAi\nnYiZ9Tez59YwzqFmdteGqpNIoygAiYhIJRSARDqfnmY2LB9QO9zM3mVmA8zsJTP7M/G4F5GNngKQ\nSOezN/EqkA8Sv3z/BvGcwOOI53ntXGHdRNYbBSCRzmeKuz+U339DPBNworu/6vHokt9UVzWR9UcB\nSKTzaf18rG3bSBPZ6CkAiXQ+/czs4/n9NOLBr7ub2Z6lNJGNngKQSOfzIjDIzMYR7365gnjL7d3Z\nCeH1Kisnsr7oadgiIlIJnQGJiEglFIBERKQSCkAiIlIJBSAREamEApCIiFRCAUhERCqhACQiIpVQ\nABIRkUr8f6Ob9/GIWbWoAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x24466a186a0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(df_train_merged['bd']);\n",
    "plt.xlabel('bd');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#开通会员时间函数\n",
    "'''\n",
    "import time\n",
    "from dateutil.parser import parse\n",
    "\n",
    "def registration_time(registration_init_time,expiration_date):\n",
    "    if type(registration_init_time) == float and type(expiration_date) == float:       \n",
    "        registration_init_time=parse(str(registration_init_time))\n",
    "        expiration_date=parse(str(expiration_date))\n",
    "        registration_time=(registration_init_time-expiration_date).days\n",
    "    else:\n",
    "         return np.nan\n",
    "    return registration_time\n",
    "        \n",
    "'''    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "listen_log = df_train[['msno','song_id','target']].merge(df_songs,on='song_id')\n",
    "\n",
    "listen_log_groupby = listen_log[['song_id', 'target']].groupby(['song_id']).agg(['mean',\n",
    "                                                                                 'count'])\n",
    "listen_log_groupby.reset_index(inplace=True)\n",
    "listen_log_groupby.columns = list(map(''.join, listen_log_groupby.columns.values))\n",
    "listen_log_groupby.columns = ['song_id', 'repeat_play_chance', 'plays']  #rename columns\n",
    "\n",
    "song_data = listen_log_groupby.merge(df_songs, on='song_id') # merge song data with computed values\n",
    "\n",
    "song_data['repeat_events'] = song_data['repeat_play_chance'] * song_data['plays']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13973"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "song_data['plays'].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "359914"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "song_data['plays'].count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
